Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorage.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RPageStorage.hxx>
18#include <ROOT/RColumn.hxx>
19#include <ROOT/RField.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RPagePool.hxx>
25#include <ROOT/RStringView.hxx>
26
27#include <Compression.h>
28#include <TError.h>
29
30#include <unordered_map>
31#include <utility>
32
33
35{
36}
37
39{
40}
41
43{
44 static RNTupleMetrics metrics("");
45 return metrics;
46}
47
48
49//------------------------------------------------------------------------------
50
51
53 : RPageStorage(name), fOptions(options)
54{
55}
56
58{
59}
60
61std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSource::Create(
62 std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options)
63{
64 return std::make_unique<RPageSourceFile>(ntupleName, location, options);
65}
66
69{
71 auto columnId = fDescriptor.FindColumnId(fieldId, column.GetIndex());
73 fActiveColumns.emplace(columnId);
74 return ColumnHandle_t{columnId, &column};
75}
76
78{
79 fActiveColumns.erase(columnHandle.fId);
80}
81
83{
84 return fDescriptor.GetNEntries();
85}
86
88{
89 return fDescriptor.GetNElements(columnHandle.fId);
90}
91
93{
94 // TODO(jblomer) distinguish trees
95 return columnHandle.fId;
96}
97
99{
100 if (fTaskScheduler)
101 UnzipClusterImpl(cluster);
102}
103
104
105//------------------------------------------------------------------------------
106
107
109 : RPageStorage(name), fOptions(options)
110{
111}
112
114{
115}
116
117std::unique_ptr<ROOT::Experimental::Detail::RPageSink> ROOT::Experimental::Detail::RPageSink::Create(
118 std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options)
119{
120 return std::make_unique<RPageSinkFile>(ntupleName, location, options);
121}
122
125{
126 auto columnId = fLastColumnId++;
127 fDescriptorBuilder.AddColumn(columnId, fieldId, column.GetVersion(), column.GetModel(), column.GetIndex());
128 return ColumnHandle_t{columnId, &column};
129}
130
131
133{
134 fDescriptorBuilder.SetNTuple(fNTupleName, model.GetDescription(), "undefined author",
135 model.GetVersion(), model.GetUuid());
136
137 std::unordered_map<const RFieldBase *, DescriptorId_t> fieldPtr2Id; // necessary to find parent field ids
138 const auto &fieldZero = *model.GetFieldZero();
139 fDescriptorBuilder.AddField(
141 .FieldId(fLastFieldId)
142 .MakeDescriptor()
143 .Unwrap()
144 );
145 fieldPtr2Id[&fieldZero] = fLastFieldId++;
146 for (auto& f : *model.GetFieldZero()) {
147 fDescriptorBuilder.AddField(
149 .FieldId(fLastFieldId)
151 .Unwrap()
152 );
153 fDescriptorBuilder.AddFieldLink(fieldPtr2Id[f.GetParent()], fLastFieldId);
154 Detail::RFieldFuse::Connect(fLastFieldId, *this, f); // issues in turn one or several calls to AddColumn()
155 fieldPtr2Id[&f] = fLastFieldId++;
156 }
157
158 auto nColumns = fLastColumnId;
159 for (DescriptorId_t i = 0; i < nColumns; ++i) {
161 columnRange.fColumnId = i;
162 columnRange.fFirstElementIndex = 0;
163 columnRange.fNElements = 0;
164 columnRange.fCompressionSettings = fOptions.GetCompression();
165 fOpenColumnRanges.emplace_back(columnRange);
167 pageRange.fColumnId = i;
168 fOpenPageRanges.emplace_back(std::move(pageRange));
169 }
170
171 CreateImpl(model);
172}
173
174
176{
177 auto locator = CommitPageImpl(columnHandle, page);
178
179 auto columnId = columnHandle.fId;
180 fOpenColumnRanges[columnId].fNElements += page.GetNElements();
182 pageInfo.fNElements = page.GetNElements();
183 pageInfo.fLocator = locator;
184 fOpenPageRanges[columnId].fPageInfos.emplace_back(pageInfo);
185}
186
187
189{
190 auto locator = CommitClusterImpl(nEntries);
191
192 R__ASSERT((nEntries - fPrevClusterNEntries) < ClusterSize_t(-1));
193 fDescriptorBuilder.AddCluster(fLastClusterId, RNTupleVersion(), fPrevClusterNEntries,
194 ClusterSize_t(nEntries - fPrevClusterNEntries));
195 fDescriptorBuilder.SetClusterLocator(fLastClusterId, locator);
196 for (auto &range : fOpenColumnRanges) {
197 fDescriptorBuilder.AddClusterColumnRange(fLastClusterId, range);
198 range.fFirstElementIndex += range.fNElements;
199 range.fNElements = 0;
200 }
201 for (auto &range : fOpenPageRanges) {
203 std::swap(fullRange, range);
204 range.fColumnId = fullRange.fColumnId;
205 fDescriptorBuilder.AddClusterPageRange(fLastClusterId, std::move(fullRange));
206 }
207 ++fLastClusterId;
208 fPrevClusterNEntries = nEntries;
209}
#define f(i)
Definition RSha256.hxx:104
#define R__ASSERT(e)
Definition TError.h:120
char name[80]
Definition TGX11.cxx:110
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:154
const RColumnModel & GetModel() const
Definition RColumn.hxx:232
std::uint32_t GetIndex() const
Definition RColumn.hxx:233
RNTupleVersion GetVersion() const
Definition RColumn.hxx:238
NTupleSize_t GetNElements() const
Definition RColumn.hxx:230
static void Connect(DescriptorId_t fieldId, RPageStorage &pageStorage, RFieldBase &field)
Definition RField.cxx:118
A collection of Counter objects with a name, a unit, and a description.
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
void DropColumn(ColumnHandle_t columnHandle) final
Unregisters a column.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
Common functionality of an ntuple storage for both reading and writing.
virtual RNTupleMetrics & GetMetrics()
Returns an empty metrics. Page storage implementations usually have their own metrics.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
ClusterSize_t::ValueType GetNElements() const
Definition RPage.hxx:83
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RDanglingFieldDescriptor FromField(const Detail::RFieldBase &field)
Make a new RDanglingFieldDescriptor based off a live NTuple field.
RDanglingFieldDescriptor & FieldId(DescriptorId_t fieldId)
The RNTupleModel encapulates the schema of an ntuple.
RNTupleVersion GetVersion() const
RFieldZero * GetFieldZero() const
Common user-tunable settings for reading ntuples.
For forward and backward compatibility, attach version information to the consitituents of the file f...
Common user-tunable settings for storing ntuples.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
RClusterSize ClusterSize_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.