Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <array>
26#include <cstdio>
27#include <memory>
28#include <string>
29#include <utility>
30
31class TFile;
32
33namespace ROOT {
34
35namespace Internal {
36class RRawFile;
37}
38
39namespace Experimental {
40namespace Detail {
41
42class RClusterPool;
43class RPageAllocatorHeap;
44class RPagePool;
45
46
47// clang-format off
48/**
49\class ROOT::Experimental::Detail::RPageSinkFile
50\ingroup NTuple
51\brief Storage provider that write ntuple pages into a file
52
53The written file can be either in ROOT format or in RNTuple bare format.
54*/
55// clang-format on
56class RPageSinkFile : public RPageSink {
57public:
58 static constexpr std::size_t kDefaultElementsPerPage = 10000;
59
60private:
62 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
63
64 std::unique_ptr<Internal::RNTupleFileWriter> fWriter;
65 /// Byte offset of the first page of the current cluster
66 std::uint64_t fClusterMinOffset = std::uint64_t(-1);
67 /// Byte offset of the end of the last page of the current cluster
68 std::uint64_t fClusterMaxOffset = 0;
69 /// Helper for zipping keys and header / footer; comprises a 16MB zip buffer
71
72protected:
73 void CreateImpl(const RNTupleModel &model) final;
74 RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
76 void CommitDatasetImpl() final;
77
78public:
79 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
80 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options,
81 std::unique_ptr<TFile> &file);
82 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83 RPageSinkFile(const RPageSinkFile&) = delete;
84 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
86 RPageSinkFile& operator=(RPageSinkFile&&) = default;
87 virtual ~RPageSinkFile();
88
89 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) final;
90 void ReleasePage(RPage &page) final;
91
92 RNTupleMetrics &GetMetrics() final { return fMetrics; }
93};
94
95
96// clang-format off
97/**
98\class ROOT::Experimental::Detail::RPageAllocatorFile
99\ingroup NTuple
100\brief Manages pages read from a the file
101*/
102// clang-format on
104public:
105 static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements);
106 static void DeletePage(const RPage& page);
107};
108
109
110// clang-format off
111/**
112\class ROOT::Experimental::Detail::RPageSourceFile
113\ingroup NTuple
114\brief Storage provider that reads ntuple pages from a file
115*/
116// clang-format on
118public:
119 /// Cannot process pages larger than 1MB
120 static constexpr std::size_t kMaxPageSize = 1024 * 1024;
121
122private:
123 /// I/O performance counters that get registered in fMetrics
124 struct RCounters {
142 };
143 std::unique_ptr<RCounters> fCounters;
144 /// Wraps the I/O counters and is observed by the RNTupleReader metrics
146
147 /// Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile
148 std::unique_ptr<RPageAllocatorFile> fPageAllocator;
149 /// The page pool might, at some point, be used by multiple page sources
150 std::shared_ptr<RPagePool> fPagePool;
151 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
153 /// Helper to unzip pages and header/footer; comprises a 16MB unzip buffer
155 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
156 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
157 /// Takes the fFile to read ntuple blobs from it
159 /// The cluster pool asynchronously preloads the next few clusters
160 std::unique_ptr<RClusterPool> fClusterPool;
161
162 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
163 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor,
164 ClusterSize_t::ValueType idxInCluster);
165
166protected:
168 void UnzipClusterImpl(RCluster *cluster) final;
169
170public:
171 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
172 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
173 /// The meta-data (header and footer) is reread and parsed by the clone.
174 std::unique_ptr<RPageSource> Clone() const final;
175
177 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
179 RPageSourceFile& operator=(RPageSourceFile&&) = default;
180 virtual ~RPageSourceFile();
181
182 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
183 RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) final;
184 void ReleasePage(RPage &page) final;
185
186 std::unique_ptr<RCluster> LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final;
187
188 RNTupleMetrics &GetMetrics() final { return fMetrics; }
189};
190
191
192} // namespace Detail
193
194} // namespace Experimental
195} // namespace ROOT
196
197#endif
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:154
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
Helper class to compress data blocks in the ROOT compression frame format.
Helper class to uncompress data blocks in the ROOT compression frame format.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
Storage provider that write ntuple pages into a file.
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
static constexpr std::size_t kDefaultElementsPerPage
void CreateImpl(const RNTupleModel &model) final
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
RNTupleCompressor fCompressor
Helper for zipping keys and header / footer; comprises a 16MB zip buffer.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
std::uint64_t fClusterMinOffset
Byte offset of the first page of the current cluster.
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
std::uint64_t fClusterMaxOffset
Byte offset of the end of the last page of the current cluster.
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
RNTupleDecompressor fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB unzip buffer.
RCluster * fCurrentCluster
The last cluster from which a page got populated. Points into fClusterPool->fPool.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType idxInCluster)
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
std::unique_ptr< RCluster > LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns) final
Populates all the pages of the given cluster id and columns; it is possible that some columns do not ...
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
static constexpr std::size_t kMaxPageSize
Cannot process pages larger than 1MB.
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
void UnzipClusterImpl(RCluster *cluster) final
std::shared_ptr< RPagePool > fPagePool
The page pool might, at some point, be used by multiple page sources.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
RNTupleMetrics & GetMetrics() final
Returns an empty metrics. Page storage implementations usually have their own metrics.
std::unique_ptr< RPageAllocatorFile > fPageAllocator
Populated pages might be shared; there memory buffer is managed by the RPageAllocatorFile.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
Abstract interface to read data from an ntuple.
std::unordered_set< DescriptorId_t > ColumnSet_t
Derived from the model (fields) that are actually being requested at a given point in time.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Meta-data for a set of ntuple clusters.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition file.py:1
I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
Generic information about the physical location of data.