Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorageFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-11-21
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorageFile
17#define ROOT7_RPageStorageFile
18
19#include <ROOT/RMiniFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RPageStorage.hxx>
23#include <ROOT/RRawFile.hxx>
24#include <string_view>
25
26#include <array>
27#include <cstdio>
28#include <memory>
29#include <string>
30#include <utility>
31
32class TFile;
33
34namespace ROOT {
35
36namespace Internal {
37class RRawFile;
38}
39
40namespace Experimental {
41class RNTuple; // for making RPageSourceFile a friend of RNTuple
42
43namespace Internal {
44class RClusterPool;
45class RPageAllocatorHeap;
46class RPagePool;
47
48// clang-format off
49/**
50\class ROOT::Experimental::Internal::RPageSinkFile
51\ingroup NTuple
52\brief Storage provider that write ntuple pages into a file
53
54The written file can be either in ROOT format or in RNTuple bare format.
55*/
56// clang-format on
58private:
59 std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
60
61 std::unique_ptr<RNTupleFileWriter> fWriter;
62 /// Number of bytes committed to storage in the current cluster
63 std::uint64_t fNBytesCurrentCluster = 0;
64 RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
65
67 std::size_t bytesPacked);
68
69protected:
71 void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
72 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
74 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
75 std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges) final;
76 std::uint64_t CommitClusterImpl() final;
77 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
78 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
79
80public:
81 RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
82 RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
83 RPageSinkFile(const RPageSinkFile&) = delete;
84 RPageSinkFile& operator=(const RPageSinkFile&) = delete;
86 RPageSinkFile& operator=(RPageSinkFile&&) = default;
87 ~RPageSinkFile() override;
88
89 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
90 void ReleasePage(RPage &page) final;
91}; // class RPageSinkFile
92
93// clang-format off
94/**
95\class ROOT::Experimental::Internal::RPageSourceFile
96\ingroup NTuple
97\brief Storage provider that reads ntuple pages from a file
98*/
99// clang-format on
102
103private:
104 /// Summarizes cluster-level information that are necessary to populate a certain page.
105 /// Used by PopulatePageFromCluster().
107 DescriptorId_t fClusterId = 0;
108 /// Location of the page on disk
110 /// The first element number of the page's column in the given cluster
111 std::uint64_t fColumnOffset = 0;
112 };
113
114 /// Populated pages might be shared; the page pool might, at some point, be used by multiple page sources
115 std::shared_ptr<RPagePool> fPagePool;
116 /// The last cluster from which a page got populated. Points into fClusterPool->fPool
117 RCluster *fCurrentCluster = nullptr;
118 /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
119 std::unique_ptr<ROOT::Internal::RRawFile> fFile;
120 /// Takes the fFile to read ntuple blobs from it
122 /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
124 /// The cluster pool asynchronously preloads the next few clusters
125 std::unique_ptr<RClusterPool> fClusterPool;
126
127 /// Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder
128 void InitDescriptor(const RNTuple &anchor);
129
130 RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
131
132 RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
133 ClusterSize_t::ValueType idxInCluster);
134
135 /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
136 /// read requests for a given cluster and columns. The reead requests are appended to
137 /// the provided vector. This way, requests can be collected for multiple clusters before
138 /// sending them to RRawFile::ReadV().
139 std::unique_ptr<RCluster> PrepareSingleCluster(
140 const RCluster::RKey &clusterKey,
141 std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
142
143protected:
144 RNTupleDescriptor AttachImpl() final;
145 void UnzipClusterImpl(RCluster *cluster) final;
146
147public:
148 RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
149 RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
150 const RNTupleReadOptions &options);
151 /// Used from the RNTuple class to build a datasource if the anchor is already available.
152 /// Requires the RNTuple object to be streamed from a file.
153 static std::unique_ptr<RPageSourceFile>
154 CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options = RNTupleReadOptions());
155 /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
156 /// The meta-data (header and footer) is reread and parsed by the clone.
157 std::unique_ptr<RPageSource> Clone() const final;
158
160 RPageSourceFile& operator=(const RPageSourceFile&) = delete;
162 RPageSourceFile &operator=(RPageSourceFile &&) = delete;
163 ~RPageSourceFile() override;
164
165 RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
166 RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) final;
167 void ReleasePage(RPage &page) final;
168
169 void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final;
170
171 std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
172}; // class RPageSourceFile
173
174} // namespace Internal
175
176} // namespace Experimental
177} // namespace ROOT
178
179#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:54
A helper class for piece-wise construction of an RNTupleDescriptor.
Base class for a sink with a physical storage backend.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
Storage provider that write ntuple pages into a file.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges) final
Vector commit of preprocessed pages.
std::uint64_t CommitClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
std::uint64_t fNBytesCurrentCluster
Number of bytes committed to storage in the current cluster.
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
std::unique_ptr< RPageAllocatorHeap > fPageAllocator
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
std::unique_ptr< RNTupleFileWriter > fWriter
void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
Storage provider that reads ntuple pages from a file.
std::shared_ptr< RPagePool > fPagePool
Populated pages might be shared; the page pool might, at some point, be used by multiple page sources...
RNTupleDescriptorBuilder fDescriptorBuilder
The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor.
std::unique_ptr< RClusterPool > fClusterPool
The cluster pool asynchronously preloads the next few clusters.
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Abstract interface to read data from an ntuple.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:41
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The on-storage meta-data of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
Summarizes cluster-level information that are necessary to populate a certain page.
RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo
Location of the page on disk.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.