Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-11-21
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RPageStorageFile
14#define ROOT_RPageStorageFile
15
16#include <ROOT/RMiniFile.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RNTupleZip.hxx>
20#include <ROOT/RPageStorage.hxx>
21#include <ROOT/RRawFile.hxx>
22#include <string_view>
23
24#include <array>
25#include <cstdio>
26#include <memory>
27#include <optional>
28#include <string>
29#include <utility>
30
31class TDirectory;
32
33namespace ROOT {
34class RNTuple; // for making RPageSourceFile a friend of RNTuple
35class RNTupleLocator;
36
37namespace Experimental {
38class RFile;
39}
40
41namespace Internal {
42class RRawFile;
44
45// clang-format off
46/**
47\class ROOT::Internal::RPageSinkFile
48\ingroup NTuple
49\brief Storage provider that write ntuple pages into a file
50
51The written file can be either in ROOT format or in RNTuple bare format.
52*/
53// clang-format on
55private:
56 // A set of pages to be committed together in a vector write.
57 // Currently we assume they're all sequential (although they may span multiple ranges).
58 struct CommitBatch {
59 /// The list of pages to commit
60 std::vector<const RSealedPage *> fSealedPages;
61 /// Total size in bytes of the batch
62 size_t fSize;
63 /// Total uncompressed size of the elements in the page batch
65 };
66
67 std::unique_ptr<ROOT::Internal::RNTupleFileWriter> fWriter;
68 /// Number of bytes committed to storage in the current cluster
69 std::uint64_t fNBytesCurrentCluster = 0;
70 /// On UpdateSchema(), the new class fields register the corresponding streamer info here so that the
71 /// streamer info records in the file can be properly updated on dataset commit
73
74 RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
75 RPageSinkFile(std::unique_ptr<ROOT::Internal::RNTupleFileWriter> writer, const ROOT::RNTupleWriteOptions &options);
76
77 /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
78 /// key. It is not strictly necessary to write and read the sealed page.
79 RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked);
80
81 /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
82 /// contained in `batch`. The locators for the written pages are appended to `locators`.
83 /// This procedure also updates some internal metrics of the page sink, hence it's not const.
84 /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
85 void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
86
87protected:
89 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
90 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override;
92 CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
93 std::vector<RNTupleLocator>
94 CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
95 std::uint64_t StageClusterImpl() final;
96 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
98 RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
99
100public:
101 RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
102 RPageSinkFile(std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options);
103 RPageSinkFile(std::string_view ntupleName, ROOT::Experimental::RFile &file, std::string_view ntupleDir,
104 const ROOT::RNTupleWriteOptions &options);
105 RPageSinkFile(const RPageSinkFile &) = delete;
106 RPageSinkFile &operator=(const RPageSinkFile &) = delete;
108 RPageSinkFile &operator=(RPageSinkFile &&) = default;
109 ~RPageSinkFile() override;
110
111 void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final;
112
114 CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override;
115}; // class RPageSinkFile
116
117// clang-format off
118/**
119\class ROOT::Internal::RPageSourceFile
120\ingroup NTuple
121\brief Storage provider that reads ntuple pages from a file
122*/
123// clang-format on
125 friend class ROOT::RNTuple;
126
127private:
128 /// Holds the uncompressed header and footer
130 std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
131 void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer
132 void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer
133
134 /// Called at the end of Attach(), i.e. when the header and footer are processed
135 void Reset()
136 {
137 RStructureBuffer empty;
138 std::swap(empty, *this);
139 }
140 };
141
142 /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
143 std::optional<RNTuple> fAnchor;
144 /// The last cluster from which a page got loaded. Points into fClusterPool->fPool
146 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
147 std::unique_ptr<RRawFile> fFile;
148 /// Takes the fFile to read ntuple blobs from it
150 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
152 /// Populated by LoadStructureImpl(), reset at the end of Attach()
154 /// Tracks the last read offset for seek distance calculation
155 std::uint64_t fLastOffset = 0;
156
157 /// File-specific I/O performance counters
164 std::unique_ptr<RFileCounters> fFileCounters;
165 /// Total file size, set once in AttachImpl()
166 std::int64_t fFileSize = 0;
167
168 RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
169
170 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
171 /// read requests for a given cluster and columns. The reead requests are appended to
172 /// the provided vector. This way, requests can be collected for multiple clusters before
173 /// sending them to RRawFile::ReadV().
174 std::unique_ptr<ROOT::Internal::RCluster>
175 PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
176
177protected:
178 void LoadStructureImpl() final;
179 ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
180 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
181 std::unique_ptr<RPageSource> CloneImpl() const final;
182
184 LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
185
186public:
187 RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
188 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<RRawFile> file,
189 const ROOT::RNTupleReadOptions &options);
190 /// Used from the RNTuple class to build a datasource if the anchor is already available.
191 /// Requires the RNTuple object to be streamed from a file.
192 static std::unique_ptr<RPageSourceFile>
193 CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
194
196 RPageSourceFile &operator=(const RPageSourceFile &) = delete;
198 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
199 ~RPageSourceFile() override;
200
202 const ROOT::RNTupleReadOptions &options = {}) final;
203
204 void
205 LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
206
207 std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
208 LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
209
210 void LoadStreamerInfo() final;
211}; // class RPageSourceFile
212
213} // namespace Internal
214} // namespace ROOT
215
216#endif
char name[80]
Definition TGX11.cxx:148
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
Definition RFile.hxx:252
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:147
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:60
A helper class for piece-wise construction of an RNTupleDescriptor.
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Uses standard C++ memory allocation for the column data pages.
RPagePersistentSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Reference to a page stored in the page pool.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::unique_ptr< RPageSink > CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override
Creates a new sink with the same underlying storage as this but writing to a different RNTuple named ...
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata).
RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
std::unique_ptr< ROOT::Internal::RNTupleFileWriter > fWriter
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fInfosOfClassFields
On UpdateSchema(), the new class fields register the corresponding streamer info here so that the str...
Abstract interface to write data into an ntuple.
std::int64_t fFileSize
Total file size, set once in AttachImpl().
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< ROOT::Internal::RCluster > PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector< RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
std::uint64_t fLastOffset
Tracks the last read offset for seek distance calculation.
ROOT::Internal::RCluster * fCurrentCluster
The last cluster from which a page got loaded. Points into fClusterPool->fPool.
std::unique_ptr< RPageSource > OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink, const ROOT::RNTupleReadOptions &options={}) final
Creates a new PageSource using the same underlying file as this but referring to a different RNTuple,...
RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final
LoadStructureImpl() has been called before AttachImpl() is called
std::unique_ptr< RFileCounters > fFileCounters
RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options)
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
RStructureBuffer fStructureBuffer
Populated by LoadStructureImpl(), reset at the end of Attach().
std::unique_ptr< RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
std::optional< RNTuple > fAnchor
Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name.
ROOT::Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RPageSource(std::string_view ntupleName, const ROOT::RNTupleReadOptions &fOptions)
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:43
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
The on-storage metadata of an RNTuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
Describe directory structure in memory.
Definition TDirectory.h:45
STL class.
STL class.
Namespace for ROOT features in testing.
Definition TROOT.h:100
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:151
The incremental changes to a RNTupleModel.
size_t fSize
Total size in bytes of the batch.
std::vector< const RSealedPage * > fSealedPages
The list of pages to commit.
size_t fBytesPacked
Total uncompressed size of the elements in the page batch.
File-specific I/O performance counters.
ROOT::Experimental::Detail::RNTupleCalcPerf & fSparseness
ROOT::Experimental::Detail::RNTupleCalcPerf & fSzFile
ROOT::Experimental::Detail::RNTupleAtomicCounter & fSzSkip
ROOT::Experimental::Detail::RNTupleCalcPerf & fRandomness
Holds the uncompressed header and footer.
void * fPtrHeader
either nullptr or points into fBuffer
void Reset()
Called at the end of Attach(), i.e. when the header and footer are processed.
void * fPtrFooter
either nullptr or points into fBuffer
std::unique_ptr< unsigned char[]> fBuffer
single buffer for both header and footer
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).