Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
18
19#include <ROOT/RColumnModel.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <algorithm>
27#include <chrono>
28#include <functional>
29#include <iterator>
30#include <map>
31#include <memory>
32#include <ostream>
33#include <vector>
34#include <string>
35#include <unordered_map>
36#include <unordered_set>
37
38namespace ROOT {
39namespace Experimental {
40
41class RFieldDescriptorBuilder;
42class RNTupleDescriptor;
43class RNTupleDescriptorBuilder;
44class RNTupleModel;
45
46namespace Detail {
47 class RFieldBase;
48}
49
50
51// clang-format off
52/**
53\class ROOT::Experimental::RFieldDescriptor
54\ingroup NTuple
55\brief Meta-data stored for every field of an ntuple
56*/
57// clang-format on
61
62private:
64 /// The version of the C++-type-to-column translation mechanics
66 /// The version of the C++ type itself
68 /// The leaf name, not including parent fields
69 std::string fFieldName;
70 /// Free text set by the user
71 std::string fFieldDescription;
72 /// The C++ type that was used when writing the field
73 std::string fTypeName;
74 /// The number of elements per entry for fixed-size arrays
75 std::uint64_t fNRepetitions = 0;
76 /// The structural information carried by this field in the data model tree
78 /// Establishes sub field relationships, such as classes and collections
80 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
81 /// order of sub fields.
82 std::vector<DescriptorId_t> fLinkIds;
83
84public:
85 RFieldDescriptor() = default;
86 RFieldDescriptor(const RFieldDescriptor &other) = delete;
90
91 /// In order to handle changes to the serialization routine in future ntuple versions
92 static constexpr std::uint16_t kFrameVersionCurrent = 0;
93 static constexpr std::uint16_t kFrameVersionMin = 0;
94
95 bool operator==(const RFieldDescriptor &other) const;
96 /// Get a copy of the descriptor
97 RFieldDescriptor Clone() const;
98 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
99 /// access to sub fields, which is provided by the ntuple descriptor argument.
100 std::unique_ptr<Detail::RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
101
102 DescriptorId_t GetId() const { return fFieldId; }
105 std::string GetFieldName() const { return fFieldName; }
106 std::string GetFieldDescription() const { return fFieldDescription; }
107 std::string GetTypeName() const { return fTypeName; }
108 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
111 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
112};
113
114
115// clang-format off
116/**
117\class ROOT::Experimental::RColumnDescriptor
118\ingroup NTuple
119\brief Meta-data stored for every column of an ntuple
120*/
121// clang-format on
125
126private:
128 /// Versions can change, e.g., when new column types are added
130 /// Contains the column type and whether it is sorted
132 /// Every column belongs to one and only one field
134 /// A field can be serialized into several columns, which are numbered from zero to $n$
135 std::uint32_t fIndex;
136
137public:
138 /// In order to handle changes to the serialization routine in future ntuple versions
139 static constexpr std::uint16_t kFrameVersionCurrent = 0;
140 static constexpr std::uint16_t kFrameVersionMin = 0;
141
142 RColumnDescriptor() = default;
143 RColumnDescriptor(const RColumnDescriptor &other) = delete;
147
148 bool operator==(const RColumnDescriptor &other) const;
149 /// Get a copy of the descriptor
150 RColumnDescriptor Clone() const;
151
152 DescriptorId_t GetId() const { return fColumnId; }
154 RColumnModel GetModel() const { return fModel; }
155 std::uint32_t GetIndex() const { return fIndex; }
157};
158
159
160// clang-format off
161/**
162\class ROOT::Experimental::RClusterDescriptor
163\ingroup NTuple
164\brief Meta-data for a set of ntuple clusters
165
166The cluster descriptor might carry information of only a subset of available clusters, for instance if multiple
167files are chained and not all of them have been processed yet. Clusters usually span across all available columns but
168in some cases they can describe only a subset of the columns, for instance when describing friend ntuples.
169*/
170// clang-format on
174
175public:
176 /// The window of element indexes of a particular column in a particular cluster
179 /// A 64bit element index
181 /// A 32bit value for the number of column elements in the cluster
183 /// The usual format for ROOT compression settings (see Compression.h).
184 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
185 std::int64_t fCompressionSettings = 0;
186
187 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
188 // Should this be done on the field level?
189
190 bool operator==(const RColumnRange &other) const {
191 return fColumnId == other.fColumnId && fFirstElementIndex == other.fFirstElementIndex &&
193 }
194
195 bool Contains(NTupleSize_t index) const {
196 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
197 }
198 };
199
200 /// Records the parition of data into pages for a particular column in a particular cluster
201 struct RPageRange {
202 /// We do not need to store the element size / uncompressed page size because we know to which column
203 /// the page belongs
204 struct RPageInfo {
205 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
207 /// The meaning of fLocator depends on the storage backend.
209
210 bool operator==(const RPageInfo &other) const {
211 return fNElements == other.fNElements && fLocator == other.fLocator;
212 }
213 };
215 /// Index (in cluster) of the first element in page.
217 /// Page number in the corresponding RPageRange.
219
221 : RPageInfo(pi), fFirstInPage(i), fPageNo(n) {}
222 };
223
224 RPageRange() = default;
225 RPageRange(const RPageRange &other) = delete;
226 RPageRange &operator =(const RPageRange &other) = delete;
227 RPageRange(RPageRange &&other) = default;
228 RPageRange &operator =(RPageRange &&other) = default;
229
231 RPageRange clone;
232 clone.fColumnId = fColumnId;
233 clone.fPageInfos = fPageInfos;
234 return clone;
235 }
236
237 /// Find the page in the RPageRange that contains the given element. The element must exist.
238 RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const;
239
241 std::vector<RPageInfo> fPageInfos;
242
243 bool operator==(const RPageRange &other) const {
244 return fColumnId == other.fColumnId && fPageInfos == other.fPageInfos;
245 }
246 };
247
248private:
250 /// Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum
252 /// Clusters can be swapped by adjusting the entry offsets
255
256 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
257 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
258
259public:
260 /// In order to handle changes to the serialization routine in future ntuple versions
261 static constexpr std::uint16_t kFrameVersionCurrent = 0;
262 static constexpr std::uint16_t kFrameVersionMin = 0;
263
269
270 bool operator==(const RClusterDescriptor &other) const;
271
272 DescriptorId_t GetId() const { return fClusterId; }
276 const RColumnRange &GetColumnRange(DescriptorId_t columnId) const { return fColumnRanges.at(columnId); }
277 const RPageRange &GetPageRange(DescriptorId_t columnId) const { return fPageRanges.at(columnId); }
278 bool ContainsColumn(DescriptorId_t columnId) const;
279 std::unordered_set<DescriptorId_t> GetColumnIds() const;
280 std::uint64_t GetBytesOnStorage() const;
281};
282
283
284// clang-format off
285/**
286\class ROOT::Experimental::RNTupleDescriptor
287\ingroup NTuple
288\brief The on-storage meta-data of an ntuple
289
290Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
291several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
292relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
293its location and size, and for every column the range of element indexes as well as a list of pages and page
294locations.
295
296The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
297for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
298
299The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
300the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
301writte struct. This allows for forward and backward compatibility when the meta-data evolves.
302*/
303// clang-format on
306
307private:
308 /// The ntuple name needs to be unique in a given storage location (file)
309 std::string fName;
310 /// Free text from the user
311 std::string fDescription;
312 /// The origin of the data
313 std::string fAuthor;
314 /// The current responsible for storing the data
315 std::string fCustodian;
316 /// The time stamp of the ntuple data (immutable)
317 std::chrono::system_clock::time_point fTimeStampData;
318 /// The time stamp of writing the data to storage, which gets updated when re-written
319 std::chrono::system_clock::time_point fTimeStampWritten;
320 /// The version evolves with the ntuple summary meta-data
322 /// Every NTuple gets a unique identifier
324 /// Column sets that are created as derived sets from existing NTuples share the same group id.
325 /// NTuples in the same group have the same number of entries and are supposed to contain associated data.
327
328 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
329 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
330
331 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
332 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
333 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
334 /// from a chain of files
335 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
336
337public:
338 // clang-format off
339 /**
340 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
341 \ingroup NTuple
342 \brief Used to loop over a field's associated columns
343 */
344 // clang-format on
346 private:
347 /// The associated NTuple for this range.
349 /// The descriptor ids of the columns ordered by index id
350 std::vector<DescriptorId_t> fColumns = {};
351 public:
352 class RIterator {
353 private:
354 /// The enclosing range's NTuple.
356 /// The enclosing range's descriptor id list.
357 const std::vector<DescriptorId_t> &fColumns;
358 std::size_t fIndex = 0;
359 public:
360 using iterator_category = std::forward_iterator_tag;
363 using difference_type = std::ptrdiff_t;
366
367 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
368 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
369 iterator operator++() { ++fIndex; return *this; }
371 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
372 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
373 };
374
376 : fNTuple(ntuple)
377 {
378 for (unsigned int i = 0; true; ++i) {
379 auto columnId = ntuple.FindColumnId(field.GetId(), i);
380 if (columnId == kInvalidDescriptorId)
381 break;
382 fColumns.emplace_back(columnId);
383 }
384 }
387 };
388
389 // clang-format off
390 /**
391 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
392 \ingroup NTuple
393 \brief Used to loop over a field's child fields
394 */
395 // clang-format on
397 private:
398 /// The associated NTuple for this range.
400 /// The descriptor ids of the child fields. These may be sorted using
401 /// a comparison function.
402 std::vector<DescriptorId_t> fFieldChildren = {};
403 public:
404 class RIterator {
405 private:
406 /// The enclosing range's NTuple.
408 /// The enclosing range's descriptor id list.
409 const std::vector<DescriptorId_t>& fFieldChildren;
410 std::size_t fIndex = 0;
411 public:
412 using iterator_category = std::forward_iterator_tag;
415 using difference_type = std::ptrdiff_t;
418
419 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
420 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
421 iterator operator++() { ++fIndex; return *this; }
425 );
426 }
427 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
428 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
429 };
431 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
432 /// Sort the range using an arbitrary comparison function.
434 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
435 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
436 {
437 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
438 }
440 return RIterator(fNTuple, fFieldChildren, 0);
441 }
444 }
445 };
446
447 // clang-format off
448 /**
449 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
450 \ingroup NTuple
451 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
452
453 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
454 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
455 TODO(jblomer): review naming of *Range classes and possibly rename consistently to *Iterable
456 */
457 // clang-format on
459 private:
460 /// The associated NTuple for this range.
462 public:
463 class RIterator {
464 private:
465 /// The enclosing range's NTuple.
467 std::size_t fIndex = 0;
468 public:
469 using iterator_category = std::forward_iterator_tag;
472 using difference_type = std::ptrdiff_t;
475
476 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
477 iterator operator++() { ++fIndex; return *this; }
479 auto it = fNTuple.fClusterDescriptors.begin();
480 std::advance(it, fIndex);
481 return it->second;
482 }
483 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
484 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
485 };
486
490 };
491
492 /// In order to handle changes to the serialization routine in future ntuple versions
493 static constexpr std::uint16_t kFrameVersionCurrent = 0;
494 static constexpr std::uint16_t kFrameVersionMin = 0;
495 /// The preamble is sufficient to get the length of the header
496 static constexpr unsigned int kNBytesPreamble = 8;
497 /// The last few bytes after the footer store the length of footer and header
498 static constexpr unsigned int kNBytesPostscript = 16;
499
500 RNTupleDescriptor() = default;
501 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
505
506 bool operator ==(const RNTupleDescriptor &other) const;
507
508 /// We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's without libCore
509 /// Serializes the global ntuple information as well as the column and field schemata
510 /// Returns the number of bytes and fills buffer if it is not nullptr.
511 /// TODO(jblomer): instead of runtime testing for nullptr, there should be a template for the case where
512 /// only the size of the buffer is required.
513 std::uint32_t SerializeHeader(void* buffer) const;
514 /// Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
515 std::uint32_t SerializeFooter(void* buffer) const;
516 /// Given kNBytesPostscript bytes, extract the header and footer lengths in bytes
517 static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter);
518
519 std::uint32_t GetHeaderSize() const {
520 return SerializeHeader(nullptr);
521 }
522 std::uint32_t GetFooterSize() const {
523 return SerializeFooter(nullptr);
524 }
525
526 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
527 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
528
530 return fFieldDescriptors.at(fieldId);
531 }
533 return fColumnDescriptors.at(columnId);
534 }
536 return fClusterDescriptors.at(clusterId);
537 }
538
540 return RFieldDescriptorIterable(*this, fieldDesc);
541 }
543 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
544 {
545 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
546 }
548 return GetFieldIterable(GetFieldDescriptor(fieldId));
549 }
551 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
552 {
553 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
554 }
557 }
559 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
560 {
561 return GetFieldIterable(GetFieldZeroId(), comparator);
562 }
563
565 {
566 return RColumnDescriptorIterable(*this, fieldDesc);
567 }
569 {
570 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
571 }
572
574 {
575 return RClusterDescriptorIterable(*this);
576 }
577
578 std::string GetName() const { return fName; }
579 std::string GetDescription() const { return fDescription; }
580 std::string GetAuthor() const { return fAuthor; }
581 std::string GetCustodian() const { return fCustodian; }
582 std::chrono::system_clock::time_point GetTimeStampData() const { return fTimeStampData; }
583 std::chrono::system_clock::time_point GetTimeStampWritten() const { return fTimeStampWritten; }
585 RNTupleUuid GetOwnUuid() const { return fOwnUuid; }
587
588 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
589 std::size_t GetNColumns() const { return fColumnDescriptors.size(); }
590 std::size_t GetNClusters() const { return fClusterDescriptors.size(); }
591
592 // The number of entries as seen with the currently loaded cluster meta-data; there might be more
595
596 /// Returns the logical parent of all top-level NTuple data fields.
599 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
600 /// Searches for a top-level field
601 DescriptorId_t FindFieldId(std::string_view fieldName) const;
602 DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
606
607 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
608 /// In case of invalid field ID, an empty string is returned.
609 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
610
611 /// Re-create the C++ model from the stored meta-data
612 std::unique_ptr<RNTupleModel> GenerateModel() const;
613 void PrintInfo(std::ostream &output) const;
614};
615
616
617// clang-format off
618/**
619\class ROOT::Experimental::RColumnDescriptorBuilder
620\ingroup NTuple
621\brief A helper class for piece-wise construction of an RColumnDescriptor
622
623Dangling column descriptors can become actual descriptors when added to an
624RNTupleDescriptorBuilder instance and then linked to their fields.
625*/
626// clang-format on
628private:
630public:
631 /// Make an empty column descriptor builder.
633
635 fColumn.fColumnId = columnId;
636 return *this;
637 }
639 fColumn.fModel = model;
640 return *this;
641 }
643 fColumn.fFieldId = fieldId;
644 return *this;
645 }
646 RColumnDescriptorBuilder& Index(std::uint32_t index) {
647 fColumn.fIndex = index;
648 return *this;
649 }
651 /// Attempt to make a column descriptor. This may fail if the column
652 /// was not given enough information to make a proper descriptor.
654};
655
656
657// clang-format off
658/**
659\class ROOT::Experimental::RFieldDescriptorBuilder
660\ingroup NTuple
661\brief A helper class for piece-wise construction of an RFieldDescriptor
662
663Dangling field descriptors describe a single field in isolation. They are
664missing the necessary relationship information (parent field, any child fields)
665required to describe a real NTuple field.
666
667Dangling field descriptors can only become actual descriptors when added to an
668RNTupleDescriptorBuilder instance and then linked to other fields.
669*/
670// clang-format on
672private:
674public:
675 /// Make an empty dangling field descriptor.
677 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
678 /// Relationship information is lost during the conversion to a
679 /// dangling descriptor:
680 /// * Parent id is reset to an invalid id.
681 /// * Field children ids are forgotten.
682 ///
683 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
684 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
685
686 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
688
690 fField.fFieldId = fieldId;
691 return *this;
692 }
694 fField.fFieldVersion = fieldVersion;
695 return *this;
696 }
698 fField.fTypeVersion = typeVersion;
699 return *this;
700 }
703 return *this;
704 }
705 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
706 fField.fFieldName = fieldName;
707 return *this;
708 }
709 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
710 fField.fFieldDescription = fieldDescription;
711 return *this;
712 }
713 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
714 fField.fTypeName = typeName;
715 return *this;
716 }
717 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
718 fField.fNRepetitions = nRepetitions;
719 return *this;
720 }
722 fField.fStructure = structure;
723 return *this;
724 }
726 /// Attempt to make a field descriptor. This may fail if the dangling field
727 /// was not given enough information to make a proper descriptor.
729};
730
731
732// clang-format off
733/**
734\class ROOT::Experimental::RClusterDescriptorBuilder
735\ingroup NTuple
736\brief A helper class for piece-wise construction of an RClusterDescriptor
737
738Dangling cluster descriptors can become actual descriptors when added to an
739RNTupleDescriptorBuilder instance.
740*/
741// clang-format on
743private:
745public:
746 /// Make an empty cluster descriptor builder.
748
750 fCluster.fClusterId = clusterId;
751 return *this;
752 }
753 RClusterDescriptorBuilder& FirstEntryIndex(std::uint64_t firstEntryIndex) {
754 fCluster.fFirstEntryIndex = firstEntryIndex;
755 return *this;
756 }
757 RClusterDescriptorBuilder& NEntries(std::uint64_t nEntries) {
758 fCluster.fNEntries = nEntries;
759 return *this;
760 }
761
763 std::uint64_t firstElementIndex,
764 std::uint32_t compressionSettings,
765 const RClusterDescriptor::RPageRange &pageRange);
766
767 /// Attempt to make a cluster descriptor. This may fail if the cluster
768 /// was not given enough information to make a proper descriptor.
770};
771
772
773// clang-format off
774/**
775\class ROOT::Experimental::RNTupleDescriptorBuilder
776\ingroup NTuple
777\brief A helper class for piece-wise construction of an RNTupleDescriptor
778
779Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
780*/
781// clang-format on
783private:
785 std::uint32_t fHeaderCRC32 = 0;
786 std::vector<Internal::RNTupleSerializer::RClusterSummary> fClusterSummaries;
787 std::vector<Internal::RNTupleSerializer::RClusterGroup> fClusterGroups;
788
790public:
791 /// Checks whether invariants hold:
792 /// * NTuple name is valid
793 /// * Fields have valid parent and child ids
795 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
797
798 void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author,
799 const RNTupleVersion &version, const RNTupleUuid &uuid);
800 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCRC32 = crc32; }
801 std::uint32_t GetHeaderCRC32() const { return fHeaderCRC32; }
802
805
806 void AddField(const RFieldDescriptor& fieldDesc);
808
809 void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId,
810 const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index);
812
813 void SetFromHeader(void* headerBuffer);
814
815 void AddCluster(DescriptorId_t clusterId, RNTupleVersion version,
816 NTupleSize_t firstEntryIndex, ClusterSize_t nEntries);
819
820 void AddClustersFromFooter(void* footerBuffer);
821
826
827 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
828 void Reset();
829};
830
831} // namespace Experimental
832} // namespace ROOT
833
834#endif // ROOT7_RNTupleDescriptor
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
XFontStruct * id
Definition TGX11.cxx:109
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder()=default
Make an empty cluster descriptor builder.
RResult< RClusterDescriptor > MoveDescriptor()
Attempt to make a cluster descriptor.
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
bool ContainsColumn(DescriptorId_t columnId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
static constexpr std::uint16_t kFrameVersionMin
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Meta-data stored for every column of an ntuple.
RColumnDescriptor(const RColumnDescriptor &other)=delete
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & FieldVersion(const RNTupleVersion &fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & TypeVersion(const RNTupleVersion &typeVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
Internal::RNTupleSerializer::RClusterGroup GetClusterGroup(std::uint32_t id) const
void AddClusterSummary(Internal::RNTupleSerializer::RClusterSummary &clusterSummary)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
std::vector< Internal::RNTupleSerializer::RClusterGroup > fClusterGroups
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void AddClusterGroup(Internal::RNTupleSerializer::RClusterGroup &clusterGroup)
std::vector< Internal::RNTupleSerializer::RClusterSummary > fClusterSummaries
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
void AddField(const RFieldDescriptor &fieldDesc)
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::chrono::system_clock::time_point GetTimeStampData() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::chrono::system_clock::time_point GetTimeStampWritten() const
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
static constexpr std::uint16_t kFrameVersionMin
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
static constexpr unsigned int kNBytesPreamble
The preamble is sufficient to get the length of the header.
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
static constexpr unsigned int kNBytesPostscript
The last few bytes after the footer store the length of footer and header.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:195
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
RPageInfoExtended(const RPageInfo &pi, RClusterSize::ValueType i, NTupleSize_t n)
RClusterSize::ValueType fFirstInPage
Index (in cluster) of the first element in page.
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Generic information about the physical location of data.
static void output(int code)
Definition gifencode.c:226