Logo ROOT  
Reference Guide
RPageStorage.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorage.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
18
19#include <ROOT/RCluster.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RPage.hxx>
26#include <ROOT/RSpan.hxx>
27#include <ROOT/RStringView.hxx>
28
29#include <atomic>
30#include <cstddef>
31#include <functional>
32#include <memory>
33#include <shared_mutex>
34#include <unordered_set>
35#include <vector>
36
37namespace ROOT {
38namespace Experimental {
39
40class RNTupleModel;
41// TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
42
43namespace Detail {
44
45class RColumn;
46class RColumnElementBase;
47class RNTupleCompressor;
48class RNTupleDecompressor;
49class RPagePool;
50class RFieldBase;
51
52enum class EPageStorageType {
53 kSink,
54 kSource,
55};
56
57// clang-format off
58/**
59\class ROOT::Experimental::Detail::RPageStorage
60\ingroup NTuple
61\brief Common functionality of an ntuple storage for both reading and writing
62
63The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
64an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
65*/
66// clang-format on
68public:
69 /// The interface of a task scheduler to schedule page (de)compression tasks
71 public:
72 virtual ~RTaskScheduler() = default;
73 /// Start a new set of tasks
74 virtual void Reset() = 0;
75 /// Take a callable that represents a task
76 virtual void AddTask(const std::function<void(void)> &taskFunc) = 0;
77 /// Blocks until all scheduled tasks finished
78 virtual void Wait() = 0;
79 };
80
81 /// A sealed page contains the bytes of a page as written to storage (packed & compressed). It is used
82 /// as an input to UnsealPages() as well as to transfer pages between different storage media.
83 /// RSealedPage does _not_ own the buffer it is pointing to in order to not interfere with the memory management
84 /// of concrete page sink and page source implementations.
85 struct RSealedPage {
86 const void *fBuffer = nullptr;
87 std::uint32_t fSize = 0;
88 std::uint32_t fNElements = 0;
89
90 RSealedPage() = default;
91 RSealedPage(const void *b, std::uint32_t s, std::uint32_t n) : fBuffer(b), fSize(s), fNElements(n) {}
92 RSealedPage(const RSealedPage &other) = delete;
93 RSealedPage& operator =(const RSealedPage &other) = delete;
94 RSealedPage(RSealedPage &&other) = default;
95 RSealedPage& operator =(RSealedPage &&other) = default;
96 };
97
98protected:
99 std::string fNTupleName;
101
102public:
104 RPageStorage(const RPageStorage &other) = delete;
105 RPageStorage& operator =(const RPageStorage &other) = delete;
106 RPageStorage(RPageStorage &&other) = default;
108 virtual ~RPageStorage();
109
110 /// Whether the concrete implementation is a sink or a source
112
115 const RColumn *fColumn = nullptr;
116
117 /// Returns true for a valid column handle; fColumn and fId should always either both
118 /// be valid or both be invalid.
119 explicit operator bool() const { return fId != kInvalidDescriptorId && fColumn; }
120 };
121 /// The column handle identifies a column with the current open page storage
123
124 /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
125 /// When writing, every column can only be attached once.
126 virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
127 /// Unregisters a column. A page source decreases the reference counter for the corresponding active column.
128 /// For a page sink, dropping columns is currently a no-op.
129 virtual void DropColumn(ColumnHandle_t columnHandle) = 0;
130
131 /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
132 /// of allocating pages.
133 virtual void ReleasePage(RPage &page) = 0;
134
135 /// Page storage implementations have their own metrics. The RPageSink and RPageSource classes provide
136 /// a default set of metrics.
138 /// Returns the NTuple name.
139 const std::string &GetNTupleName() const { return fNTupleName; }
140
141 void SetTaskScheduler(RTaskScheduler *taskScheduler) { fTaskScheduler = taskScheduler; }
142};
143
144// clang-format off
145/**
146\class ROOT::Experimental::Detail::RPageSink
147\ingroup NTuple
148\brief Abstract interface to write data into an ntuple
149
150The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
151The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
152up to the given entry number are committed.
153*/
154// clang-format on
155class RPageSink : public RPageStorage {
156private:
157 /// Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization
159
160protected:
161 /// Default I/O performance counters that get registered in fMetrics
162 struct RCounters {
170 };
171 std::unique_ptr<RCounters> fCounters;
173
174 std::unique_ptr<RNTupleWriteOptions> fOptions;
175
176 /// Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
177 /// There could be concrete page sinks that don't need a compressor. Therefore, and in order to stay consistent
178 /// with the page source, we leave it up to the derived class whether or not the compressor gets constructed.
179 std::unique_ptr<RNTupleCompressor> fCompressor;
180
181 /// Remembers the starting cluster id for the next cluster group
182 std::uint64_t fNextClusterInGroup = 0;
183 /// Used to calculate the number of entries in the current cluster
185 /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
186 std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
187 /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
188 std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
190
191 virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) = 0;
192 virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
194 const RPageStorage::RSealedPage &sealedPage) = 0;
195 /// Returns the number of bytes written to storage (excluding metadata)
196 virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) = 0;
197 /// Returns the locator of the page list envelope of the given buffer that contains the serialized page list.
198 /// Typically, the implementation takes care of compressing and writing the provided buffer.
199 virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) = 0;
200 virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) = 0;
201
202 /// Helper for streaming a page. This is commonly used in derived, concrete page sinks. Note that if
203 /// compressionSetting is 0 (uncompressed) and the page is mappable, the returned sealed page will
204 /// point directly to the input page buffer. Otherwise, the sealed page references an internal buffer
205 /// of fCompressor. Thus, the buffer pointed to by the RSealedPage should never be freed.
206 /// Usage of this method requires construction of fCompressor.
207 RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting);
208
209 /// Seal a page using the provided buffer.
210 static RSealedPage SealPage(const RPage &page, const RColumnElementBase &element,
211 int compressionSetting, void *buf);
212
213 /// Enables the default set of metrics provided by RPageSink. `prefix` will be used as the prefix for
214 /// the counters registered in the internal RNTupleMetrics object.
215 /// This set of counters can be extended by a subclass by calling `fMetrics.MakeCounter<...>()`.
216 ///
217 /// A subclass using the default set of metrics is always responsible for updating the counters
218 /// appropriately, e.g. `fCounters->fNPageCommited.Inc()`
219 ///
220 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
221 /// GetMetrics() member function.
222 void EnableDefaultMetrics(const std::string &prefix);
223
224public:
225 RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
226
227 RPageSink(const RPageSink&) = delete;
228 RPageSink& operator=(const RPageSink&) = delete;
229 RPageSink(RPageSink&&) = default;
231 virtual ~RPageSink();
232
233 /// Guess the concrete derived page source from the file name (location)
234 static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
235 const RNTupleWriteOptions &options = RNTupleWriteOptions());
237 /// Returns the sink's write options.
238 const RNTupleWriteOptions &GetWriteOptions() const { return *fOptions; }
239
240 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
241 void DropColumn(ColumnHandle_t /*columnHandle*/) final {}
242
243 /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
244 /// To do so, Create() calls CreateImpl() after updating the descriptor.
245 /// Create() associates column handles to the columns referenced by the model
246 void Create(RNTupleModel &model);
247 /// Write a page to the storage. The column must have been added before.
248 void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
249 /// Write a preprocessed page to storage. The column must have been added before.
250 /// TODO(jblomer): allow for vector commit of sealed pages
251 void CommitSealedPage(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage);
252 /// Finalize the current cluster and create a new one for the following data.
253 /// Returns the number of bytes written to storage (excluding meta-data).
254 std::uint64_t CommitCluster(NTupleSize_t nEntries);
255 /// Write out the page locations (page list envelope) for all the committed clusters since the last call of
256 /// CommitClusterGroup (or the beginning of writing).
257 void CommitClusterGroup();
258 /// Finalize the current cluster and the entrire data set.
259 void CommitDataset();
260
261 /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
262 /// the page sink picks an appropriate size.
263 virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) = 0;
264
265 /// Returns the default metrics object. Subclasses might alternatively provide their own metrics object by overriding this.
266 virtual RNTupleMetrics &GetMetrics() override { return fMetrics; };
267};
268
269// clang-format off
270/**
271\class ROOT::Experimental::Detail::RPageSource
272\ingroup NTuple
273\brief Abstract interface to read data from an ntuple
274
275The page source is initialized with the columns of interest. Pages from those columns can then be
276mapped into memory. The page source also gives access to the ntuple's meta-data.
277*/
278// clang-format on
279class RPageSource : public RPageStorage {
280public:
281 /// An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard().
284 std::shared_mutex &fLock;
285
286 public:
287 RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
288 {
289 fLock.lock_shared();
290 }
295 ~RSharedDescriptorGuard() { fLock.unlock_shared(); }
296 const RNTupleDescriptor *operator->() const { return &fDescriptor; }
297 const RNTupleDescriptor &GetRef() const { return fDescriptor; }
298 };
299
300 /// An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuard().
303 std::shared_mutex &fLock;
304
305 public:
306 RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
307 {
308 fLock.lock();
309 }
315 {
317 fLock.unlock();
318 }
320 void MoveIn(RNTupleDescriptor &&desc) { fDescriptor = std::move(desc); }
321 };
322
323private:
325 mutable std::shared_mutex fDescriptorLock;
326
327protected:
328 /// Default I/O performance counters that get registered in fMetrics
329 struct RCounters {
347 };
348 std::unique_ptr<RCounters> fCounters;
349 /// Wraps the I/O counters and is observed by the RNTupleReader metrics
351
353 /// The active columns are implicitly defined by the model fields or views
355
356 /// Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
357 /// Not all page sources need a decompressor (e.g. virtual ones for chains and friends don't), thus we
358 /// leave it up to the derived class whether or not the decompressor gets constructed.
359 std::unique_ptr<RNTupleDecompressor> fDecompressor;
360
362 // Only called if a task scheduler is set. No-op be default.
363 virtual void UnzipClusterImpl(RCluster * /* cluster */)
364 { }
365
366 /// Helper for unstreaming a page. This is commonly used in derived, concrete page sources. The implementation
367 /// currently always makes a memory copy, even if the sealed page is uncompressed and in the final memory layout.
368 /// The optimization of directly mapping pages is left to the concrete page source implementations.
369 /// Usage of this method requires construction of fDecompressor.
370 std::unique_ptr<unsigned char []> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element);
371
372 /// Enables the default set of metrics provided by RPageSource. `prefix` will be used as the prefix for
373 /// the counters registered in the internal RNTupleMetrics object.
374 /// A subclass using the default set of metrics is responsible for updating the counters
375 /// appropriately, e.g. `fCounters->fNRead.Inc()`
376 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
377 /// GetMetrics() member function.
378 void EnableDefaultMetrics(const std::string &prefix);
379
380 /// Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further information.
382
383public:
385 RPageSource(const RPageSource&) = delete;
389 virtual ~RPageSource();
390 /// Guess the concrete derived page source from the file name (location)
391 static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
392 const RNTupleReadOptions &options = RNTupleReadOptions());
393 /// Open the same storage multiple time, e.g. for reading in multiple threads
394 virtual std::unique_ptr<RPageSource> Clone() const = 0;
395
397 const RNTupleReadOptions &GetReadOptions() const { return fOptions; }
398
399 /// Takes the read lock for the descriptor. Multiple threads can take the lock concurrently.
400 /// The underlying std::shared_mutex, however, is neither read nor write recursive:
401 /// within one thread, only one lock (shared or exclusive) must be acquired at the same time. This requires special
402 /// care in sections protected by GetSharedDescriptorGuard() and GetExclDescriptorGuard() especially to avoid that
403 /// the locks are acquired indirectly (e.g. by a call to GetNEntries()).
404 /// As a general guideline, no other method of the page source should be called (directly or indirectly) in a
405 /// guarded section.
407 {
409 }
410
411 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override;
412 void DropColumn(ColumnHandle_t columnHandle) override;
413
414 /// Open the physical storage container for the tree
419
420 /// Allocates and fills a page that contains the index-th element
421 virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
422 /// Another version of PopulatePage that allows to specify cluster-relative indexes
423 virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
424
425 /// Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage. The sealed page
426 /// can be used subsequently in a call to RPageSink::CommitSealedPage.
427 /// The fSize and fNElements member of the sealedPage parameters are always set. If sealedPage.fBuffer is nullptr,
428 /// no data will be copied but the returned size information can be used by the caller to allocate a large enough
429 /// buffer and call LoadSealedPage again.
430 virtual void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage) = 0;
431
432 /// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
433 /// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
434 /// To indicate which columns have been loaded, LoadClusters() must mark them with SetColumnAvailable().
435 /// That includes the ones from the `columns` that don't have pages; otherwise subsequent requests
436 /// for the cluster would assume an incomplete cluster and trigger loading again.
437 /// LoadClusters() is typically called from the I/O thread of a cluster pool, i.e. the method runs
438 /// concurrently to other methods of the page source.
439 virtual std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
440
441 /// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
442 /// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
443 /// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the
444 /// actual implementation will only run if a task scheduler is set. In practice, a task scheduler is set
445 /// if implicit multi-threading is turned on.
446 void UnzipCluster(RCluster *cluster);
447
448 /// Returns the default metrics object. Subclasses might alternatively override the method and provide their own metrics object.
449 virtual RNTupleMetrics &GetMetrics() override { return fMetrics; };
450};
451
452} // namespace Detail
453
454} // namespace Experimental
455} // namespace ROOT
456
457#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition: TGX11.cxx:110
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition: RCluster.hxx:156
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
Abstract interface to write data into an ntuple.
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
virtual RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(RPageSink &&)=default
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
std::unique_ptr< RCounters > fCounters
RNTupleDescriptorBuilder fDescriptorBuilder
RPageSink & operator=(const RPageSink &)=delete
void CommitSealedPage(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage.
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)=0
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
void CommitClusterGroup()
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length)=0
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< unsigned char[]> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element)
Helper for unstreaming a page.
virtual void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
std::unique_ptr< RCounters > fCounters
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource & operator=(RPageSource &&)=delete
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
virtual RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
virtual void UnzipClusterImpl(RCluster *)
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RCluster::ColumnSet_t fActiveColumns
The active columns are implicitly defined by the model fields or views.
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
The serialization context is used for the piecewise serialization of a descriptor.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:87
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:47
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:79
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:84
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
static constexpr double s
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
Generic information about the physical location of data.