Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RRawFile.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <array>
27#include <cstdio>
28#include <memory>
29#include <string>
30#include <utility>
31
32class TFile;
33
34namespace ROOT {
35
36namespace Internal {
37class RRawFile;
38}
39
40namespace Experimental {
41namespace Detail {
42
43class RClusterPool;
44class RPageAllocatorHeap;
45class RPagePool;
46
47
48// clang-format off
49/**
50\class ROOT::Experimental::Detail::RPageSinkFile
51\ingroup NTuple
52\brief Storage provider that write ntuple pages into a file
53
54The written file can be either in ROOT format or in RNTuple bare format.
55*/
56// clang-format on
57class RPageSinkFile : public RPageSink {
58private:
59 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
60
61 std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
62 /// Byte offset of the first page of the current cluster
63 std::uint64_t fClusterMinOffset = std::uint64_t(-1);
64 /// Byte offset of the end of the last page of the current cluster
65 std::uint64_t fClusterMaxOffset = 0;
66 /// Number of bytes committed to storage in the current cluster
67 std::uint64_t fNBytesCurrentCluster = 0;
68 /// Used to keep the column and field IDs issued during header serialization for the footer serialization
70 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
71
73 std::size_t bytesPacked);
74
75protected:
76 void CreateImpl(const RNTupleModel &model) final;
77 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
79 std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final;
80 void CommitDatasetImpl() final;
81
82public:
83 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
84 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
85 std::unique_ptr<TFile> &file);
86 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
87 RPageSinkFile(const RPageSinkFile&) = delete;
88 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
90 RPageSinkFile& operator=(RPageSinkFile&&) = default;
91 virtual ~RPageSinkFile();
92
93 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
94 void ReleasePage(RPage &page) final;
95};
96
97
98// clang-format off
99/**
100\class ROOT::Experimental::Detail::RPageAllocatorFile
101\ingroup NTuple
102\brief Manages pages read from a the file
103*/
104// clang-format on
106public:
107 static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
108 static void DeletePage(const RPage& page);
109};
110
111
112// clang-format off
113/**
114\class ROOT::Experimental::Detail::RPageSourceFile
115\ingroup NTuple
116\brief Storage provider that reads ntuple pages from a file
117*/
118// clang-format on
120public:
121 /// Cannot process pages larger than 1MB
122 static constexpr std::size_t kMaxPageSize = 1024 * 1024;
123
124private:
125 /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
126 std::unique_ptr<RPageAllocatorFile> fPageAllocator;
127 /// The page pool might, at some point, be used by multiple page sources
128 std::shared_ptr<RPagePool> fPagePool;
129 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
130 RCluster *fCurrentCluster = nullptr;
131 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
132 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
133 /// Takes the fFile to read ntuple blobs from it
135 /// The cluster pool asynchronously preloads the next few clusters
136 std::unique_ptr<RClusterPool> fClusterPool;
137
138 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
139 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
140 ClusterSize_t::ValueType idxInCluster);
141
142 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
143 /// read requests for a given cluster and columns. The reead requests are appended to
144 /// the provided vector. This way, requests can be collected for multiple clusters before
145 /// sending them to RRawFile::ReadV().
146 std::unique_ptr<RCluster> PrepareSingleCluster(
147 const RCluster::RKey &clusterKey,
148 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
149
150protected:
151 RNTupleDescriptor AttachImpl() final;
152 void UnzipClusterImpl(RCluster *cluster) final;
153
154public:
155 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
156 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
157 /// The meta-data (header and footer) is reread and parsed by the clone.
158 std::unique_ptr<RPageSource> Clone() const final;
159
161 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
163 RPageSourceFile& operator=(RPageSourceFile&&) = default;
164 virtual ~RPageSourceFile();
165
166 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
167 RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
168 void ReleasePage(RPage &page) final;
169
170 void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex,
171 RSealedPage &sealedPage) final;
172
173 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
174};
175
176
177} // namespace Detail
178
179} // namespace Experimental
180} // namespace ROOT
181
182#endif
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:154
Storage provider that write ntuple pages into a file.
RNTupleLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
void CreateImpl(const RNTupleModel &model) final
std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final
Returns the number of bytes written to storage (excluding metadata)
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
Internal::RNTupleSerializer::RContext fSerializationContext
Used to keep the column and field IDs issued during header serialization for the footer serialization...
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
The serialization context is used for the piecewise serialization of a descriptor.
Meta-data for a set of ntuple clusters.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition file.py:1
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:158
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.