Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageSourceFriends.cxx
Go to the documentation of this file.
1/// \file RPageSourceFriends.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-08-10
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RCluster.hxx>
17#include <ROOT/RError.hxx>
18#include <ROOT/RLogger.hxx>
21
22#include <utility>
23
25 std::span<std::unique_ptr<RPageSource>> sources)
26 : RPageSource(ntupleName, RNTupleReadOptions()), fMetrics(std::string(ntupleName))
27{
28 for (auto &s : sources) {
29 fSources.emplace_back(std::move(s));
30 fMetrics.ObserveMetrics(fSources.back()->GetMetrics());
31 }
32}
33
35
37 std::size_t originIdx,
38 const RFieldDescriptor &originField,
39 DescriptorId_t virtualParent,
40 const std::string &virtualName)
41{
42 auto virtualFieldId = fNextId++;
43 auto virtualField =
44 RFieldDescriptorBuilder(originField).FieldId(virtualFieldId).FieldName(virtualName).MakeDescriptor().Unwrap();
45 fBuilder.AddField(virtualField);
46 fBuilder.AddFieldLink(virtualParent, virtualFieldId);
47 fIdBiMap.Insert({originIdx, originField.GetId()}, virtualFieldId);
48
49 for (const auto &f : originDesc.GetFieldIterable(originField))
50 AddVirtualField(originDesc, originIdx, f, virtualFieldId, f.GetFieldName());
51
52 for (const auto &c : originDesc.GetColumnIterable(originField)) {
53 auto physicalId = c.IsAliasColumn() ? fColumnMap.GetVirtualId({originIdx, c.GetPhysicalId()}) : fNextId;
54 RColumnDescriptorBuilder columnBuilder;
55 columnBuilder.LogicalColumnId(fNextId)
56 .PhysicalColumnId(physicalId)
57 .FieldId(virtualFieldId)
58 .BitsOnStorage(c.GetBitsOnStorage())
59 .ValueRange(c.GetValueRange())
60 .Type(c.GetType())
61 .Index(c.GetIndex())
62 .RepresentationIndex(c.GetRepresentationIndex());
63 fBuilder.AddColumn(columnBuilder.MakeDescriptor().Unwrap()).ThrowOnError();
64 fColumnMap.Insert({originIdx, c.GetLogicalId()}, fNextId);
65 fNextId++;
66 }
67}
68
70{
71 fBuilder.SetNTuple(fNTupleName, "");
72 fBuilder.AddField(
73 RFieldDescriptorBuilder().FieldId(0).Structure(ENTupleStructure::kRecord).MakeDescriptor().Unwrap());
74
75 for (std::size_t i = 0; i < fSources.size(); ++i) {
76 fSources[i]->Attach();
77
78 if (fSources[i]->GetNEntries() != fSources[0]->GetNEntries()) {
79 fNextId = 1;
80 fIdBiMap.Clear();
81 fColumnMap.Clear();
82 fBuilder.Reset();
83 throw RException(R__FAIL("mismatch in the number of entries of friend RNTuples"));
84 }
85
86 auto descriptorGuard = fSources[i]->GetSharedDescriptorGuard();
87 for (unsigned j = 0; j < i; ++j) {
88 if (fSources[j]->GetSharedDescriptorGuard()->GetName() == descriptorGuard->GetName()) {
89 fNextId = 1;
90 fIdBiMap.Clear();
91 fColumnMap.Clear();
92 fBuilder.Reset();
93 throw RException(R__FAIL("duplicate names of friend RNTuples"));
94 }
95 }
96 AddVirtualField(descriptorGuard.GetRef(), i, descriptorGuard->GetFieldZero(), 0, descriptorGuard->GetName());
97
98 for (const auto &cg : descriptorGuard->GetClusterGroupIterable()) {
99 auto clusterGroupBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(cg);
100 clusterGroupBuilder.ClusterGroupId(fNextId);
101 fBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap());
102 fIdBiMap.Insert({i, cg.GetId()}, fNextId);
103 fNextId++;
104 }
105
106 for (const auto &c : descriptorGuard->GetClusterIterable()) {
107 RClusterDescriptorBuilder clusterBuilder;
108 clusterBuilder.ClusterId(fNextId).FirstEntryIndex(c.GetFirstEntryIndex()).NEntries(c.GetNEntries());
109 for (const auto &originColumnRange : c.GetColumnRangeIterable()) {
110 DescriptorId_t virtualColumnId = fColumnMap.GetVirtualId({i, originColumnRange.fPhysicalColumnId});
111 if (originColumnRange.fIsSuppressed) {
112 clusterBuilder.MarkSuppressedColumnRange(virtualColumnId);
113 } else {
114 auto pageRange = c.GetPageRange(originColumnRange.fPhysicalColumnId).Clone();
115 pageRange.fPhysicalColumnId = virtualColumnId;
116
117 auto firstElementIndex = originColumnRange.fFirstElementIndex;
118 auto compressionSettings = originColumnRange.fCompressionSettings;
119
120 clusterBuilder.CommitColumnRange(virtualColumnId, firstElementIndex, compressionSettings, pageRange);
121 }
122 }
123 clusterBuilder.CommitSuppressedColumnRanges(fBuilder.GetDescriptor()).ThrowOnError();
124 fBuilder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap());
125 fIdBiMap.Insert({i, c.GetId()}, fNextId);
126 fNextId++;
127 }
128 }
129
130 fBuilder.EnsureValidDescriptor();
131 return fBuilder.MoveDescriptor();
132}
133
134std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
136{
137 std::vector<std::unique_ptr<RPageSource>> cloneSources;
138 cloneSources.reserve(fSources.size());
139 for (const auto &f : fSources)
140 cloneSources.emplace_back(f->Clone());
141 auto clone = std::make_unique<RPageSourceFriends>(fNTupleName, cloneSources);
142 clone->fIdBiMap = fIdBiMap;
143 clone->fColumnMap = fColumnMap;
144 return clone;
145}
146
149{
150 auto originFieldId = fIdBiMap.GetOriginId(fieldId);
151 fSources[originFieldId.fSourceIdx]->AddColumn(originFieldId.fId, column);
152 return RPageSource::AddColumn(fieldId, column);
153}
154
156{
157 RPageSource::DropColumn(columnHandle);
158 auto originColumnId = fColumnMap.GetOriginId(columnHandle.fPhysicalId);
159 columnHandle.fPhysicalId = originColumnId.fId;
160 fSources[originColumnId.fSourceIdx]->DropColumn(columnHandle);
161}
162
165{
166 auto virtualColumnId = columnHandle.fPhysicalId;
167 auto originColumnId = fColumnMap.GetOriginId(virtualColumnId);
168 columnHandle.fPhysicalId = originColumnId.fId;
169
170 auto pageRef = fSources[originColumnId.fSourceIdx]->LoadPage(columnHandle, globalIndex);
171 // Suppressed column
172 if (!pageRef.Get().IsValid())
173 return RPageRef();
174
175 auto virtualClusterId = fIdBiMap.GetVirtualId({originColumnId.fSourceIdx, pageRef.Get().GetClusterInfo().GetId()});
176 pageRef.ChangeIds(virtualColumnId, virtualClusterId);
177
178 return pageRef;
179}
180
183{
184 auto virtualColumnId = columnHandle.fPhysicalId;
185 auto originColumnId = fColumnMap.GetOriginId(virtualColumnId);
186 RClusterIndex originClusterIndex(fIdBiMap.GetOriginId(clusterIndex.GetClusterId()).fId, clusterIndex.GetIndex());
187 columnHandle.fPhysicalId = originColumnId.fId;
188
189 auto pageRef = fSources[originColumnId.fSourceIdx]->LoadPage(columnHandle, originClusterIndex);
190 // Suppressed column
191 if (!pageRef.Get().IsValid())
192 return RPageRef();
193
194 pageRef.ChangeIds(virtualColumnId, clusterIndex.GetClusterId());
195 return pageRef;
196}
197
199 RClusterIndex clusterIndex,
200 RSealedPage &sealedPage)
201{
202 auto originColumnId = fColumnMap.GetOriginId(physicalColumnId);
203 RClusterIndex originClusterIndex(fIdBiMap.GetOriginId(clusterIndex.GetClusterId()).fId, clusterIndex.GetIndex());
204
205 fSources[originColumnId.fSourceIdx]->LoadSealedPage(physicalColumnId, originClusterIndex, sealedPage);
206}
207
208std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
210{
211 // The virtual friends page source does not pre-load any clusters itself. However, the underlying page sources
212 // that are combined may well do it.
213 return std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>(clusterKeys.size());
214}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
void ObserveMetrics(RNTupleMetrics &observee)
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & Type(EColumnType type)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & ValueRange(double min, double max)
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:40
A helper class for piece-wise construction of an RFieldDescriptor.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Reference to a page stored in the page pool.
Definition RPagePool.hxx:93
std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
RPageSourceFriends(std::string_view ntupleName, std::span< std::unique_ptr< RPageSource > > sources)
void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by sealedPage.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, RColumn &column) final
Register a new column.
std::vector< std::unique_ptr< RPageSource > > fSources
RPageRef LoadPage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
RNTupleDescriptor AttachImpl() final
LoadStructureImpl() has been called before AttachImpl() is called
void DropColumn(ColumnHandle_t columnHandle) final
Unregisters a column.
std::unique_ptr< RPageSource > CloneImpl() const final
Returns a new, unattached page source for the same data set.
void AddVirtualField(const RNTupleDescriptor &originDesc, std::size_t originIdx, const RFieldDescriptor &originField, DescriptorId_t virtualParent, const std::string &virtualName)
Abstract interface to read data from an ntuple.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, RColumn &column) override
Register a new column.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Meta-data stored for every field of an ntuple.
The on-storage meta-data of an ntuple.
RColumnDescriptorIterable GetColumnIterable() const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
Common user-tunable settings for reading ntuples.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
A sealed page contains the bytes of a page as written to storage (packed & compressed).