Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
20#include <ROOT/RNTuple.hxx>
22#include <ROOT/RNTupleZip.hxx>
23#include <ROOT/RPageStorage.hxx>
24#include <ROOT/RRawFile.hxx>
25#include <string_view>
26
27#include <array>
28#include <cstdio>
29#include <memory>
30#include <optional>
31#include <string>
32#include <utility>
33
34class TFile;
35
36namespace ROOT {
37class RNTuple; // for making RPageSourceFile a friend of RNTuple
38
39namespace Internal {
40class RRawFile;
41}
42
43namespace Experimental {
44struct RNTupleLocator;
45
46namespace Internal {
47class RClusterPool;
48class RPageAllocatorHeap;
49
50// clang-format off
51/**
52\class ROOT::Experimental::Internal::RPageSinkFile
53\ingroup NTuple
54\brief Storage provider that write ntuple pages into a file
55
56The written file can be either in ROOT format or in RNTuple bare format.
57*/
58// clang-format on
60private:
61 // A set of pages to be committed together in a vector write.
62 // Currently we assume they're all sequential (although they may span multiple ranges).
63 struct CommitBatch {
64 /// The list of pages to commit
65 std::vector<const RSealedPage *> fSealedPages;
66 /// Total size in bytes of the batch
67 size_t fSize;
68 /// Total uncompressed size of the elements in the page batch
70 };
71
72 std::unique_ptr<RNTupleFileWriter> fWriter;
73 /// Number of bytes committed to storage in the current cluster
74 std::uint64_t fNBytesCurrentCluster = 0;
75 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
76
77 /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
78 /// key. It is not strictly necessary to write and read the sealed page.
79 RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked);
80
81 /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
82 /// contained in `batch`. The locators for the written pages are appended to `locators`.
83 /// This procedure also updates some internal metrics of the page sink, hence it's not const.
84 /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
85 void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
86
87protected:
89 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
90 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override;
92 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
93 std::vector<RNTupleLocator>
94 CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
95 std::uint64_t StageClusterImpl() final;
96 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
98 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
99
100public:
101 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
102 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
103 RPageSinkFile(const RPageSinkFile &) = delete;
104 RPageSinkFile &operator=(const RPageSinkFile &) = delete;
106 RPageSinkFile &operator=(RPageSinkFile &&) = default;
107 ~RPageSinkFile() override;
108}; // class RPageSinkFile
109
110// clang-format off
111/**
112\class ROOT::Experimental::Internal::RPageSourceFile
113\ingroup NTuple
114\brief Storage provider that reads ntuple pages from a file
115*/
116// clang-format on
118 friend class ROOT::RNTuple;
119
120private:
121 /// Holds the uncompressed header and footer
123 std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
124 void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer
125 void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer
126
127 /// Called at the end of Attach(), i.e. when the header and footer are processed
128 void Reset()
129 {
130 RStructureBuffer empty;
131 std::swap(empty, *this);
132 }
133 };
134
135 /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
136 std::optional<RNTuple> fAnchor;
137 /// The last cluster from which a page got loaded. Points into fClusterPool->fPool
138 RCluster *fCurrentCluster = nullptr;
139 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
140 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
141 /// Takes the fFile to read ntuple blobs from it
143 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
145 /// The cluster pool asynchronously preloads the next few clusters
146 std::unique_ptr<RClusterPool> fClusterPool;
147 /// Populated by LoadStructureImpl(), reset at the end of Attach()
149
150 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
151
152 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
153 /// read requests for a given cluster and columns. The reead requests are appended to
154 /// the provided vector. This way, requests can be collected for multiple clusters before
155 /// sending them to RRawFile::ReadV().
156 std::unique_ptr<RCluster>
157 PrepareSingleCluster(const RCluster::RKey &clusterKey, std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
158
159protected:
160 void LoadStructureImpl() final;
161 RNTupleDescriptor AttachImpl() final;
162 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
163 std::unique_ptr<RPageSource> CloneImpl() const final;
164
165 RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
166 ClusterSize_t::ValueType idxInCluster) final;
167
168public:
169 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
170 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
171 const RNTupleReadOptions &options);
172 /// Used from the RNTuple class to build a datasource if the anchor is already available.
173 /// Requires the RNTuple object to be streamed from a file.
174 static std::unique_ptr<RPageSourceFile>
175 CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options = RNTupleReadOptions());
176
178 RPageSourceFile &operator=(const RPageSourceFile &) = delete;
180 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
181 ~RPageSourceFile() override;
182
183 void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final;
184
185 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
186}; // class RPageSourceFile
187
188} // namespace Internal
189
190} // namespace Experimental
191} // namespace ROOT
192
193#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:59
A helper class for piece-wise construction of an RNTupleDescriptor.
Base class for a sink with a physical storage backend.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Reference to a page stored in the page pool.
Definition RPagePool.hxx:93
Storage provider that write ntuple pages into a file.
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
std::unique_ptr< RNTupleFileWriter > fWriter
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
Storage provider that reads ntuple pages from a file.
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RStructureBuffer fStructureBuffer
Populated by LoadStructureImpl(), reset at the end of Attach()
std::optional< RNTuple > fAnchor
Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:46
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
size_t fBytesPacked
Total uncompressed size of the elements in the page batch.
std::vector< const RSealedPage * > fSealedPages
The list of pages to commit.
std::unique_ptr< unsigned char[]> fBuffer
single buffer for both header and footer
void Reset()
Called at the end of Attach(), i.e. when the header and footer are processed.
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
Generic information about the physical location of data.