Logo ROOT  
Reference Guide
RPageStorageFile.cxx
Go to the documentation of this file.
1/// \file RPageStorageFile.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-25
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RField.hxx>
17#include <ROOT/RLogger.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RNTupleZip.hxx>
21#include <ROOT/RPage.hxx>
23#include <ROOT/RPagePool.hxx>
25#include <ROOT/RRawFile.hxx>
26
27#include <RVersion.h>
28#include <TError.h>
29
30#include <algorithm>
31#include <cstdio>
32#include <cstdlib>
33#include <iostream>
34#include <utility>
35
36
38 const RNTupleWriteOptions &options)
39 : RPageSink(ntupleName, options)
40 , fMetrics("RPageSinkRoot")
41 , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
42{
43 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
44 "Do not store real data with this version of RNTuple!";
45
46 fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(Internal::RNTupleFileWriter::Recreate(
47 ntupleName, path, options.GetCompression(), options.GetContainerFormat()));
48}
49
50
52 const RNTupleWriteOptions &options)
53 : RPageSink(ntupleName, options)
54 , fMetrics("RPageSinkRoot")
55 , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
56{
57 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
58 "Do not store real data with this version of RNTuple!";
59
60 fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(Internal::RNTupleFileWriter::Append(ntupleName, file));
61}
62
63
65 const RNTupleWriteOptions &options, std::unique_ptr<TFile> &file)
66 : RPageSink(ntupleName, options)
67 , fMetrics("RPageSinkRoot")
68 , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
69{
70 R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
71 "Do not store real data with this version of RNTuple!";
72 fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(
74}
75
76
78{
79}
80
81
83{
84 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
85 auto szHeader = descriptor.SerializeHeader(nullptr);
86 auto buffer = std::unique_ptr<unsigned char[]>(new unsigned char[szHeader]);
87 descriptor.SerializeHeader(buffer.get());
88
89 auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[szHeader]);
90 auto szZipHeader = fCompressor(buffer.get(), szHeader, fOptions.GetCompression(),
91 [&zipBuffer](const void *b, size_t n, size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
92 fWriter->WriteNTupleHeader(zipBuffer.get(), szZipHeader, szHeader);
93}
94
95
98{
99 unsigned char *buffer = reinterpret_cast<unsigned char *>(page.GetBuffer());
100 bool isAdoptedBuffer = true;
101 auto packedBytes = page.GetSize();
102 auto element = columnHandle.fColumn->GetElement();
103 const auto isMappable = element->IsMappable();
104
105 if (!isMappable) {
106 packedBytes = (page.GetNElements() * element->GetBitsOnStorage() + 7) / 8;
107 buffer = new unsigned char[packedBytes];
108 isAdoptedBuffer = false;
109 element->Pack(buffer, page.GetBuffer(), page.GetNElements());
110 }
111 auto zippedBytes = packedBytes;
112
113 if (fOptions.GetCompression() != 0) {
114 zippedBytes = fCompressor(buffer, packedBytes, fOptions.GetCompression());
115 if (!isAdoptedBuffer)
116 delete[] buffer;
117 buffer = const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(fCompressor.GetZipBuffer()));
118 isAdoptedBuffer = true;
119 }
120
121 auto offsetData = fWriter->WriteBlob(buffer, zippedBytes, packedBytes);
122 fClusterMinOffset = std::min(offsetData, fClusterMinOffset);
123 fClusterMaxOffset = std::max(offsetData, fClusterMaxOffset);
124
125 if (!isAdoptedBuffer)
126 delete[] buffer;
127
129 result.fPosition = offsetData;
130 result.fBytesOnStorage = zippedBytes;
131 return result;
132}
133
134
137{
139 result.fPosition = fClusterMinOffset;
140 result.fBytesOnStorage = fClusterMaxOffset - fClusterMinOffset;
141 fClusterMinOffset = std::uint64_t(-1);
142 fClusterMaxOffset = 0;
143 return result;
144}
145
146
148{
149 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
150 auto szFooter = descriptor.SerializeFooter(nullptr);
151 auto buffer = std::unique_ptr<unsigned char []>(new unsigned char[szFooter]);
152 descriptor.SerializeFooter(buffer.get());
153
154 auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[szFooter]);
155 auto szZipFooter = fCompressor(buffer.get(), szFooter, fOptions.GetCompression(),
156 [&zipBuffer](const void *b, size_t n, size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
157 fWriter->WriteNTupleFooter(zipBuffer.get(), szZipFooter, szFooter);
158 fWriter->Commit();
159}
160
161
164{
165 if (nElements == 0)
166 nElements = kDefaultElementsPerPage;
167 auto elementSize = columnHandle.fColumn->GetElement()->GetSize();
168 return fPageAllocator->NewPage(columnHandle.fId, elementSize, nElements);
169}
170
172{
173 fPageAllocator->DeletePage(page);
174}
175
176
177////////////////////////////////////////////////////////////////////////////////
178
179
181 ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
182{
183 RPage newPage(columnId, mem, elementSize * nElements, elementSize);
184 newPage.TryGrow(nElements);
185 return newPage;
186}
187
189{
190 if (page.IsNull())
191 return;
192 delete[] reinterpret_cast<unsigned char *>(page.GetBuffer());
193}
194
195
196////////////////////////////////////////////////////////////////////////////////
197
198
200 const RNTupleReadOptions &options)
201 : RPageSource(ntupleName, options)
202 , fMetrics("RPageSourceFile")
203 , fPageAllocator(std::make_unique<RPageAllocatorFile>())
204 , fPagePool(std::make_shared<RPagePool>())
205{
206}
207
208
210 const RNTupleReadOptions &options)
211 : RPageSourceFile(ntupleName, options)
212{
216}
217
218
220{
221}
222
223
225{
226 RNTupleDescriptorBuilder descBuilder;
227 const auto fNTuple = fReader.GetNTuple(fNTupleName);
228
229 auto buffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fLenHeader]);
230 auto zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fNBytesHeader]);
231 fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fSeekHeader);
232 fDecompressor(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fLenHeader, buffer.get());
233 descBuilder.SetFromHeader(buffer.get());
234
235 buffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fLenFooter]);
236 zipBuffer = std::unique_ptr<unsigned char[]>(new unsigned char[fNTuple.fNBytesFooter]);
237 fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fSeekFooter);
238 fDecompressor(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fLenFooter, buffer.get());
239 descBuilder.AddClustersFromFooter(buffer.get());
240
241 return descBuilder.MoveDescriptor();
242}
243
244
246 ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
247{
248 const auto columnId = columnHandle.fId;
249 const auto clusterId = clusterDescriptor.GetId();
250 const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
251
252 // TODO(jblomer): binary search
254 decltype(clusterIndex) firstInPage = 0;
255 for (const auto &pi : pageRange.fPageInfos) {
256 if (firstInPage + pi.fNElements > clusterIndex) {
257 pageInfo = pi;
258 break;
259 }
260 firstInPage += pi.fNElements;
261 }
262 R__ASSERT(firstInPage <= clusterIndex);
263 R__ASSERT((firstInPage + pageInfo.fNElements) > clusterIndex);
264
265 const auto element = columnHandle.fColumn->GetElement();
266 const auto elementSize = element->GetSize();
267
268 auto pageSize = pageInfo.fLocator.fBytesOnStorage;
269 auto pageBuffer = new unsigned char[
270 std::max(pageSize, static_cast<std::uint32_t>(elementSize * pageInfo.fNElements))];
271 fReader.ReadBuffer(pageBuffer, pageInfo.fLocator.fBytesOnStorage, pageInfo.fLocator.fPosition);
272
273 const auto bytesOnStorage = (element->GetBitsOnStorage() * pageInfo.fNElements + 7) / 8;
274 if (pageSize != bytesOnStorage) {
275 fDecompressor(pageBuffer, pageSize, bytesOnStorage);
276 pageSize = bytesOnStorage;
277 }
278
279 if (!element->IsMappable()) {
280 pageSize = elementSize * pageInfo.fNElements;
281 auto unpackedBuffer = new unsigned char[pageSize];
282 element->Unpack(unpackedBuffer, pageBuffer, pageInfo.fNElements);
283 delete[] pageBuffer;
284 pageBuffer = unpackedBuffer;
285 }
286
287 const auto indexOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
288 auto newPage = fPageAllocator->NewPage(columnId, pageBuffer, elementSize, pageInfo.fNElements);
289 newPage.SetWindow(indexOffset + firstInPage, RPage::RClusterInfo(clusterId, indexOffset));
290 fPagePool->RegisterPage(newPage,
291 RPageDeleter([](const RPage &page, void * /*userData*/)
292 {
294 }, nullptr));
295 return newPage;
296}
297
298
300 ColumnHandle_t columnHandle, NTupleSize_t globalIndex)
301{
302 const auto columnId = columnHandle.fId;
303 auto cachedPage = fPagePool->GetPage(columnId, globalIndex);
304 if (!cachedPage.IsNull())
305 return cachedPage;
306
307 const auto clusterId = fDescriptor.FindClusterId(columnId, globalIndex);
308 R__ASSERT(clusterId != kInvalidDescriptorId);
309 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
310 const auto selfOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
311 R__ASSERT(selfOffset <= globalIndex);
312 return PopulatePageFromCluster(columnHandle, clusterDescriptor, globalIndex - selfOffset);
313}
314
315
317 ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)
318{
319 const auto clusterId = clusterIndex.GetClusterId();
320 const auto index = clusterIndex.GetIndex();
321 const auto columnId = columnHandle.fId;
322 auto cachedPage = fPagePool->GetPage(columnId, clusterIndex);
323 if (!cachedPage.IsNull())
324 return cachedPage;
325
326 R__ASSERT(clusterId != kInvalidDescriptorId);
327 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
328 return PopulatePageFromCluster(columnHandle, clusterDescriptor, index);
329}
330
332{
333 fPagePool->ReturnPage(page);
334}
335
336std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSourceFile::Clone() const
337{
338 auto clone = new RPageSourceFile(fNTupleName, fOptions);
339 clone->fFile = fFile->Clone();
340 clone->fReader = Internal::RMiniFileReader(clone->fFile.get());
341 return std::unique_ptr<RPageSourceFile>(clone);
342}
#define R__WARNING_HERE(GROUP)
Definition: RLogger.hxx:184
#define b(i)
Definition: RSha256.hxx:100
#define R__ASSERT(e)
Definition: TError.h:96
virtual bool IsMappable() const
Derived, typed classes tell whether the on-storage layout is bitwise identical to the memory layout.
RColumnElementBase * GetElement() const
Definition: RColumn.hxx:230
Manages pages read from a the file.
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
Uses standard C++ memory allocation for the column data pages.
A closure that can free the memory associated with a mapped page.
A thread-safe cache of column pages.
Definition: RPagePool.hxx:46
RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options)
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
void CreateImpl(const RNTupleModel &model) final
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
Abstract interface to read data from an ntuple.
Stores information about the cluster in which this page resides.
Definition: RPage.hxx:46
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
void * TryGrow(ClusterSize_t::ValueType nElements)
Return a pointer after the last element that has space for nElements new elements.
Definition: RPage.hxx:107
ClusterSize_t::ValueType GetSize() const
The space taken by column elements in the buffer.
Definition: RPage.hxx:81
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition: RMiniFile.hxx:101
static RNTupleFileWriter * Append(std::string_view ntupleName, TFile &file)
Add a new RNTuple identified by ntupleName to the existing TFile.
Definition: RMiniFile.cxx:1137
static RNTupleFileWriter * Recreate(std::string_view ntupleName, std::string_view path, int defaultCompression, ENTupleContainerFormat containerFormat)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
Definition: RMiniFile.cxx:1094
Meta-data for a set of ntuple clusters.
const RPageRange & GetPageRange(DescriptorId_t columnId) const
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:82
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
ENTupleContainerFormat GetContainerFormat() const
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:53
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:74
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:79
static constexpr double pi
Definition: file.py:1
NTupleSize_t fFirstElementIndex
A 64bit element index.
Generic information about the physical location of data.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...