Logo ROOT  
Reference Guide
RPageStorageRoot.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RField.hxx>
18#include <ROOT/RNTupleModel.hxx>
19#include <ROOT/RPage.hxx>
21#include <ROOT/RPagePool.hxx>
23#include <ROOT/RLogger.hxx>
24
25#include <TKey.h>
26
27#include <cstdlib>
28#include <iostream>
29#include <utility>
30
31namespace {
32
33static constexpr const char* kKeySeparator = "_";
34static constexpr const char* kKeyNTupleFooter = "NTPLF";
35static constexpr const char* kKeyNTupleHeader = "NTPLH";
36static constexpr const char* kKeyPagePayload = "NTPLP";
37
38}
39
41 const RNTupleWriteOptions &options)
42 : RPageSink(ntupleName, options)
43 , fMetrics("RPageSinkRoot")
44 , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
45{
46 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
47 "Do not store real data with this version of RNTuple!";
48 fFile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "RECREATE"));
49 fFile->SetCompressionSettings(fOptions.GetCompression());
50}
51
53{
54 if (fFile)
55 fFile->Close();
56}
57
59{
60 fDirectory = fFile->mkdir(fNTupleName.c_str());
61
62 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
63 auto szHeader = descriptor.SerializeHeader(nullptr);
64 auto buffer = new unsigned char[szHeader];
65 descriptor.SerializeHeader(buffer);
66 ROOT::Experimental::Internal::RNTupleBlob blob(szHeader, buffer);
67 fDirectory->WriteObject(&blob, kKeyNTupleHeader);
68 delete[] buffer;
69}
70
73{
74 unsigned char *buffer = reinterpret_cast<unsigned char *>(page.GetBuffer());
75 auto packedBytes = page.GetSize();
76 auto element = columnHandle.fColumn->GetElement();
77 const auto isMappable = element->IsMappable();
78
79 if (!isMappable) {
80 packedBytes = (page.GetNElements() * element->GetBitsOnStorage() + 7) / 8;
81 buffer = new unsigned char[packedBytes];
82 element->Pack(buffer, page.GetBuffer(), page.GetNElements());
83 }
84
85 ROOT::Experimental::Internal::RNTupleBlob pagePayload(packedBytes, buffer);
86 std::string keyName = std::string(kKeyPagePayload) +
87 std::to_string(fLastClusterId) + kKeySeparator +
88 std::to_string(fLastPageIdx);
89 fDirectory->WriteObject(&pagePayload, keyName.c_str());
90
91 if (!isMappable) {
92 delete[] buffer;
93 }
94
96 result.fPosition = fLastPageIdx++;
97 result.fBytesOnStorage = packedBytes;
98 return result;
99}
100
103{
104 fLastPageIdx = 0;
106}
107
109{
110 if (!fDirectory)
111 return;
112
113 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
114 auto szFooter = descriptor.SerializeFooter(nullptr);
115 auto buffer = new unsigned char[szFooter];
116 descriptor.SerializeFooter(buffer);
117 ROOT::Experimental::Internal::RNTupleBlob footerBlob(szFooter, buffer);
118 fDirectory->WriteObject(&footerBlob, kKeyNTupleFooter);
119 delete[] buffer;
120}
121
124{
125 if (nElements == 0)
126 nElements = kDefaultElementsPerPage;
127 auto elementSize = columnHandle.fColumn->GetElement()->GetSize();
128 return fPageAllocator->NewPage(columnHandle.fId, elementSize, nElements);
129}
130
132{
133 fPageAllocator->DeletePage(page);
134}
135
136
137////////////////////////////////////////////////////////////////////////////////
138
139
141 ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
142{
143 RPage newPage(columnId, mem, elementSize * nElements, elementSize);
144 newPage.TryGrow(nElements);
145 return newPage;
146}
147
150{
151 if (page.IsNull())
152 return;
153 R__ASSERT(page.GetBuffer() == payload->fContent);
154 free(payload->fContent);
155 delete payload;
156}
157
158
159////////////////////////////////////////////////////////////////////////////////
160
161
163 const RNTupleReadOptions &options)
164 : RPageSource(ntupleName, options)
165 , fMetrics("RPageSourceRoot")
166 , fPageAllocator(std::make_unique<RPageAllocatorKey>())
167 , fPagePool(std::make_shared<RPagePool>())
168{
169 fFile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "READ"));
170}
171
172
174{
175 if (fFile)
176 fFile->Close();
177}
178
179
181{
182 fDirectory = fFile->GetDirectory(fNTupleName.c_str());
183 RNTupleDescriptorBuilder descBuilder;
184
185 auto keyRawNTupleHeader = fDirectory->GetKey(kKeyNTupleHeader);
186 auto ntupleRawHeader = keyRawNTupleHeader->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
187 descBuilder.SetFromHeader(ntupleRawHeader->fContent);
188 free(ntupleRawHeader->fContent);
189 delete ntupleRawHeader;
190
191 auto keyRawNTupleFooter = fDirectory->GetKey(kKeyNTupleFooter);
192 auto ntupleRawFooter = keyRawNTupleFooter->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
193 descBuilder.AddClustersFromFooter(ntupleRawFooter->fContent);
194 free(ntupleRawFooter->fContent);
195 delete ntupleRawFooter;
196
197 return descBuilder.MoveDescriptor();
198}
199
200
202 ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
203{
204 auto columnId = columnHandle.fId;
205 auto clusterId = clusterDescriptor.GetId();
206 const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
207
208 // TODO(jblomer): binary search
210 decltype(clusterIndex) firstInPage = 0;
211 for (const auto &pi : pageRange.fPageInfos) {
212 if (firstInPage + pi.fNElements > clusterIndex) {
213 pageInfo = pi;
214 break;
215 }
216 firstInPage += pi.fNElements;
217 }
218 R__ASSERT(firstInPage <= clusterIndex);
219 R__ASSERT((firstInPage + pageInfo.fNElements) > clusterIndex);
220
221 //printf("Populating page %lu/%lu [%lu] for column %d starting at %lu\n", clusterId, pageInCluster, pageIdx, columnId, firstInPage);
222
223 std::string keyName = std::string(kKeyPagePayload) +
224 std::to_string(clusterId) + kKeySeparator +
225 std::to_string(pageInfo.fLocator.fPosition);
226 auto pageKey = fDirectory->GetKey(keyName.c_str());
227 auto pagePayload = pageKey->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
228
229 unsigned char *buffer = pagePayload->fContent;
230 auto element = columnHandle.fColumn->GetElement();
231 auto elementSize = element->GetSize();
232 if (!element->IsMappable()) {
233 auto pageSize = elementSize * pageInfo.fNElements;
234 buffer = reinterpret_cast<unsigned char *>(malloc(pageSize));
235 R__ASSERT(buffer != nullptr);
236 element->Unpack(buffer, pagePayload->fContent, pageInfo.fNElements);
237 free(pagePayload->fContent);
238 pagePayload->fContent = buffer;
239 pagePayload->fSize = pageSize;
240 }
241
242 auto indexOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
243 auto newPage = fPageAllocator->NewPage(columnId, pagePayload->fContent, elementSize, pageInfo.fNElements);
244 newPage.SetWindow(indexOffset + firstInPage, RPage::RClusterInfo(clusterId, indexOffset));
245 fPagePool->RegisterPage(newPage,
246 RPageDeleter([](const RPage &page, void *userData)
247 {
249 }, pagePayload));
250 return newPage;
251}
252
253
255 ColumnHandle_t columnHandle, NTupleSize_t globalIndex)
256{
257 auto columnId = columnHandle.fId;
258 auto cachedPage = fPagePool->GetPage(columnId, globalIndex);
259 if (!cachedPage.IsNull())
260 return cachedPage;
261
262 auto clusterId = fDescriptor.FindClusterId(columnId, globalIndex);
263 R__ASSERT(clusterId != kInvalidDescriptorId);
264 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
265 auto selfOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
266 R__ASSERT(selfOffset <= globalIndex);
267 return PopulatePageFromCluster(columnHandle, clusterDescriptor, globalIndex - selfOffset);
268}
269
270
272 ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)
273{
274 auto clusterId = clusterIndex.GetClusterId();
275 auto index = clusterIndex.GetIndex();
276 auto columnId = columnHandle.fId;
277 auto cachedPage = fPagePool->GetPage(columnId, clusterIndex);
278 if (!cachedPage.IsNull())
279 return cachedPage;
280
281 R__ASSERT(clusterId != kInvalidDescriptorId);
282 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
283 return PopulatePageFromCluster(columnHandle, clusterDescriptor, index);
284}
285
287{
288 fPagePool->ReturnPage(page);
289}
290
291std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSourceRoot::Clone() const
292{
293 return std::make_unique<RPageSourceRoot>(fNTupleName, fFile->GetName(), fOptions);
294}
#define R__WARNING_HERE(GROUP)
Definition: RLogger.hxx:184
#define R__ASSERT(e)
Definition: TError.h:96
#define free
Definition: civetweb.c:1539
#define malloc
Definition: civetweb.c:1536
virtual bool IsMappable() const
Derived, typed classes tell whether the on-storage layout is bitwise identical to the memory layout.
RColumnElementBase * GetElement() const
Definition: RColumn.hxx:230
Uses standard C++ memory allocation for the column data pages.
Adopts the memory returned by TKey->ReadObject()
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
static void DeletePage(const RPage &page, ROOT::Experimental::Internal::RNTupleBlob *payload)
A closure that can free the memory associated with a mapped page.
A thread-safe cache of column pages.
Definition: RPagePool.hxx:46
void DoCreate(const RNTupleModel &model) final
RClusterDescriptor::RLocator DoCommitCluster(NTupleSize_t nEntries) final
std::unique_ptr< TFile > fFile
Currently, an ntuple is stored as a directory in a TFile.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
RPageSinkRoot(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options)
RClusterDescriptor::RLocator DoCommitPage(ColumnHandle_t columnHandle, const RPage &page) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
Abstract interface to write data into an ntuple.
const RNTupleWriteOptions fOptions
std::unique_ptr< TFile > fFile
Currently, an ntuple is stored as a directory in a TFile.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
std::unique_ptr< RPageSource > Clone() const final
Open the same storage multiple time, e.g. for reading in multiple threads.
RPageSourceRoot(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options)
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
Abstract interface to read data from an ntuple.
Stores information about the cluster in which this page resides.
Definition: RPage.hxx:46
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
void * TryGrow(ClusterSize_t::ValueType nElements)
Return a pointer after the last element that has space for nElements new elements.
Definition: RPage.hxx:107
ClusterSize_t::ValueType GetSize() const
The space taken by column elements in the buffer.
Definition: RPage.hxx:81
Meta-data for a set of ntuple clusters.
const RPageRange & GetPageRange(DescriptorId_t columnId) const
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:83
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3923
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:43
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:75
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:80
static constexpr double pi
NTupleSize_t fFirstElementIndex
A 64bit element index.
Generic information about the physical location of data.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...