Logo ROOT  
Reference Guide
RPageStorage.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorage.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
18
19#include <ROOT/RCluster.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RPage.hxx>
26#include <ROOT/RSpan.hxx>
27#include <ROOT/RStringView.hxx>
28
29#include <atomic>
30#include <cstddef>
31#include <deque>
32#include <functional>
33#include <memory>
34#include <shared_mutex>
35#include <unordered_set>
36#include <vector>
37
38namespace ROOT {
39namespace Experimental {
40
41class RNTupleModel;
42// TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
43
44namespace Detail {
45
46class RColumn;
47class RColumnElementBase;
48class RNTupleCompressor;
49class RNTupleDecompressor;
50class RPagePool;
51class RFieldBase;
52
53enum class EPageStorageType {
54 kSink,
55 kSource,
56};
57
58// clang-format off
59/**
60\class ROOT::Experimental::Detail::RPageStorage
61\ingroup NTuple
62\brief Common functionality of an ntuple storage for both reading and writing
63
64The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
65an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
66*/
67// clang-format on
69public:
70 /// The interface of a task scheduler to schedule page (de)compression tasks
72 public:
73 virtual ~RTaskScheduler() = default;
74 /// Start a new set of tasks
75 virtual void Reset() = 0;
76 /// Take a callable that represents a task
77 virtual void AddTask(const std::function<void(void)> &taskFunc) = 0;
78 /// Blocks until all scheduled tasks finished
79 virtual void Wait() = 0;
80 };
81
82 /// A sealed page contains the bytes of a page as written to storage (packed & compressed). It is used
83 /// as an input to UnsealPages() as well as to transfer pages between different storage media.
84 /// RSealedPage does _not_ own the buffer it is pointing to in order to not interfere with the memory management
85 /// of concrete page sink and page source implementations.
86 struct RSealedPage {
87 const void *fBuffer = nullptr;
88 std::uint32_t fSize = 0;
89 std::uint32_t fNElements = 0;
90
91 RSealedPage() = default;
92 RSealedPage(const void *b, std::uint32_t s, std::uint32_t n) : fBuffer(b), fSize(s), fNElements(n) {}
93 RSealedPage(const RSealedPage &other) = delete;
94 RSealedPage& operator =(const RSealedPage &other) = delete;
95 RSealedPage(RSealedPage &&other) = default;
96 RSealedPage& operator =(RSealedPage &&other) = default;
97 };
98
99 using SealedPageSequence_t = std::deque<RSealedPage>;
100 /// A range of sealed pages referring to the same column that can be used for vector commit
103 SealedPageSequence_t::const_iterator fFirst;
104 SealedPageSequence_t::const_iterator fLast;
105
106 RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
107 : fColumnId(d), fFirst(b), fLast(e)
108 {
109 }
110 };
111
112protected:
113 std::string fNTupleName;
115
116public:
118 RPageStorage(const RPageStorage &other) = delete;
119 RPageStorage& operator =(const RPageStorage &other) = delete;
120 RPageStorage(RPageStorage &&other) = default;
122 virtual ~RPageStorage();
123
124 /// Whether the concrete implementation is a sink or a source
126
129 const RColumn *fColumn = nullptr;
130
131 /// Returns true for a valid column handle; fColumn and fId should always either both
132 /// be valid or both be invalid.
133 explicit operator bool() const { return fId != kInvalidDescriptorId && fColumn; }
134 };
135 /// The column handle identifies a column with the current open page storage
137
138 /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
139 /// When writing, every column can only be attached once.
140 virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
141 /// Unregisters a column. A page source decreases the reference counter for the corresponding active column.
142 /// For a page sink, dropping columns is currently a no-op.
143 virtual void DropColumn(ColumnHandle_t columnHandle) = 0;
144
145 /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
146 /// of allocating pages.
147 virtual void ReleasePage(RPage &page) = 0;
148
149 /// Page storage implementations have their own metrics. The RPageSink and RPageSource classes provide
150 /// a default set of metrics.
152 /// Returns the NTuple name.
153 const std::string &GetNTupleName() const { return fNTupleName; }
154
155 void SetTaskScheduler(RTaskScheduler *taskScheduler) { fTaskScheduler = taskScheduler; }
156};
157
158// clang-format off
159/**
160\class ROOT::Experimental::Detail::RPageSink
161\ingroup NTuple
162\brief Abstract interface to write data into an ntuple
163
164The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
165The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
166up to the given entry number are committed.
167*/
168// clang-format on
169class RPageSink : public RPageStorage {
170private:
171 /// Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization
173
174protected:
175 /// Default I/O performance counters that get registered in fMetrics
176 struct RCounters {
184 };
185 std::unique_ptr<RCounters> fCounters;
187
188 std::unique_ptr<RNTupleWriteOptions> fOptions;
189
190 /// Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
191 /// There could be concrete page sinks that don't need a compressor. Therefore, and in order to stay consistent
192 /// with the page source, we leave it up to the derived class whether or not the compressor gets constructed.
193 std::unique_ptr<RNTupleCompressor> fCompressor;
194
195 /// Remembers the starting cluster id for the next cluster group
196 std::uint64_t fNextClusterInGroup = 0;
197 /// Used to calculate the number of entries in the current cluster
199 /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
200 std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
201 /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
202 std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
204
205 virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) = 0;
206 virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
208 const RPageStorage::RSealedPage &sealedPage) = 0;
209 /// Vector commit of preprocessed pages. The `ranges` array specifies a range of sealed pages to be
210 /// committed for each column. The returned vector contains, in order, the RNTupleLocator for each
211 /// page on each range in `ranges`, i.e. the first N entries refer to the N pages in `ranges[0]`,
212 /// followed by M entries that refer to the M pages in `ranges[1]`, etc.
213 /// The default is to call `CommitSealedPageImpl` for each page; derived classes may provide an
214 /// optimized implementation though.
215 virtual std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
216 /// Returns the number of bytes written to storage (excluding metadata)
217 virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) = 0;
218 /// Returns the locator of the page list envelope of the given buffer that contains the serialized page list.
219 /// Typically, the implementation takes care of compressing and writing the provided buffer.
220 virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) = 0;
221 virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) = 0;
222
223 /// Helper for streaming a page. This is commonly used in derived, concrete page sinks. Note that if
224 /// compressionSetting is 0 (uncompressed) and the page is mappable, the returned sealed page will
225 /// point directly to the input page buffer. Otherwise, the sealed page references an internal buffer
226 /// of fCompressor. Thus, the buffer pointed to by the RSealedPage should never be freed.
227 /// Usage of this method requires construction of fCompressor.
228 RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting);
229
230 /// Seal a page using the provided buffer.
231 static RSealedPage SealPage(const RPage &page, const RColumnElementBase &element,
232 int compressionSetting, void *buf);
233
234 /// Enables the default set of metrics provided by RPageSink. `prefix` will be used as the prefix for
235 /// the counters registered in the internal RNTupleMetrics object.
236 /// This set of counters can be extended by a subclass by calling `fMetrics.MakeCounter<...>()`.
237 ///
238 /// A subclass using the default set of metrics is always responsible for updating the counters
239 /// appropriately, e.g. `fCounters->fNPageCommited.Inc()`
240 ///
241 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
242 /// GetMetrics() member function.
243 void EnableDefaultMetrics(const std::string &prefix);
244
245public:
246 RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
247
248 RPageSink(const RPageSink&) = delete;
249 RPageSink& operator=(const RPageSink&) = delete;
250 RPageSink(RPageSink&&) = default;
252 ~RPageSink() override;
253
254 /// Guess the concrete derived page source from the file name (location)
255 static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
256 const RNTupleWriteOptions &options = RNTupleWriteOptions());
258 /// Returns the sink's write options.
259 const RNTupleWriteOptions &GetWriteOptions() const { return *fOptions; }
260
261 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
262 void DropColumn(ColumnHandle_t /*columnHandle*/) final {}
263
264 /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
265 /// To do so, Create() calls CreateImpl() after updating the descriptor.
266 /// Create() associates column handles to the columns referenced by the model
267 void Create(RNTupleModel &model);
268 /// Write a page to the storage. The column must have been added before.
269 void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
270 /// Write a preprocessed page to storage. The column must have been added before.
271 void CommitSealedPage(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage);
272 /// Write a vector of preprocessed pages to storage. The corresponding columns must have been added before.
273 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges);
274 /// Finalize the current cluster and create a new one for the following data.
275 /// Returns the number of bytes written to storage (excluding meta-data).
276 std::uint64_t CommitCluster(NTupleSize_t nEntries);
277 /// Write out the page locations (page list envelope) for all the committed clusters since the last call of
278 /// CommitClusterGroup (or the beginning of writing).
279 void CommitClusterGroup();
280 /// Finalize the current cluster and the entrire data set.
281 void CommitDataset();
282
283 /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
284 /// the page sink picks an appropriate size.
285 virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) = 0;
286
287 /// Returns the default metrics object. Subclasses might alternatively provide their own metrics object by overriding this.
288 RNTupleMetrics &GetMetrics() override { return fMetrics; };
289};
290
291// clang-format off
292/**
293\class ROOT::Experimental::Detail::RPageSource
294\ingroup NTuple
295\brief Abstract interface to read data from an ntuple
296
297The page source is initialized with the columns of interest. Pages from those columns can then be
298mapped into memory. The page source also gives access to the ntuple's meta-data.
299*/
300// clang-format on
301class RPageSource : public RPageStorage {
302public:
303 /// An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard().
306 std::shared_mutex &fLock;
307
308 public:
309 RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
310 {
311 fLock.lock_shared();
312 }
317 ~RSharedDescriptorGuard() { fLock.unlock_shared(); }
318 const RNTupleDescriptor *operator->() const { return &fDescriptor; }
319 const RNTupleDescriptor &GetRef() const { return fDescriptor; }
320 };
321
322 /// An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuard().
325 std::shared_mutex &fLock;
326
327 public:
328 RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
329 {
330 fLock.lock();
331 }
337 {
339 fLock.unlock();
340 }
342 void MoveIn(RNTupleDescriptor &&desc) { fDescriptor = std::move(desc); }
343 };
344
345private:
347 mutable std::shared_mutex fDescriptorLock;
348
349protected:
350 /// Default I/O performance counters that get registered in fMetrics
351 struct RCounters {
369 };
370 std::unique_ptr<RCounters> fCounters;
371 /// Wraps the I/O counters and is observed by the RNTupleReader metrics
373
375 /// The active columns are implicitly defined by the model fields or views
377
378 /// Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
379 /// Not all page sources need a decompressor (e.g. virtual ones for chains and friends don't), thus we
380 /// leave it up to the derived class whether or not the decompressor gets constructed.
381 std::unique_ptr<RNTupleDecompressor> fDecompressor;
382
384 // Only called if a task scheduler is set. No-op be default.
385 virtual void UnzipClusterImpl(RCluster * /* cluster */)
386 { }
387
388 /// Helper for unstreaming a page. This is commonly used in derived, concrete page sources. The implementation
389 /// currently always makes a memory copy, even if the sealed page is uncompressed and in the final memory layout.
390 /// The optimization of directly mapping pages is left to the concrete page source implementations.
391 /// Usage of this method requires construction of fDecompressor.
392 std::unique_ptr<unsigned char []> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element);
393
394 /// Enables the default set of metrics provided by RPageSource. `prefix` will be used as the prefix for
395 /// the counters registered in the internal RNTupleMetrics object.
396 /// A subclass using the default set of metrics is responsible for updating the counters
397 /// appropriately, e.g. `fCounters->fNRead.Inc()`
398 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
399 /// GetMetrics() member function.
400 void EnableDefaultMetrics(const std::string &prefix);
401
402 /// Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further information.
404
405public:
407 RPageSource(const RPageSource&) = delete;
411 ~RPageSource() override;
412 /// Guess the concrete derived page source from the file name (location)
413 static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
414 const RNTupleReadOptions &options = RNTupleReadOptions());
415 /// Open the same storage multiple time, e.g. for reading in multiple threads
416 virtual std::unique_ptr<RPageSource> Clone() const = 0;
417
419 const RNTupleReadOptions &GetReadOptions() const { return fOptions; }
420
421 /// Takes the read lock for the descriptor. Multiple threads can take the lock concurrently.
422 /// The underlying std::shared_mutex, however, is neither read nor write recursive:
423 /// within one thread, only one lock (shared or exclusive) must be acquired at the same time. This requires special
424 /// care in sections protected by GetSharedDescriptorGuard() and GetExclDescriptorGuard() especially to avoid that
425 /// the locks are acquired indirectly (e.g. by a call to GetNEntries()).
426 /// As a general guideline, no other method of the page source should be called (directly or indirectly) in a
427 /// guarded section.
429 {
431 }
432
433 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override;
434 void DropColumn(ColumnHandle_t columnHandle) override;
435
436 /// Open the physical storage container for the tree
441
442 /// Allocates and fills a page that contains the index-th element
443 virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
444 /// Another version of PopulatePage that allows to specify cluster-relative indexes
445 virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
446
447 /// Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage. The sealed page
448 /// can be used subsequently in a call to RPageSink::CommitSealedPage.
449 /// The fSize and fNElements member of the sealedPage parameters are always set. If sealedPage.fBuffer is nullptr,
450 /// no data will be copied but the returned size information can be used by the caller to allocate a large enough
451 /// buffer and call LoadSealedPage again.
452 virtual void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage) = 0;
453
454 /// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
455 /// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
456 /// To indicate which columns have been loaded, LoadClusters() must mark them with SetColumnAvailable().
457 /// That includes the ones from the `columns` that don't have pages; otherwise subsequent requests
458 /// for the cluster would assume an incomplete cluster and trigger loading again.
459 /// LoadClusters() is typically called from the I/O thread of a cluster pool, i.e. the method runs
460 /// concurrently to other methods of the page source.
461 virtual std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
462
463 /// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
464 /// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
465 /// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the
466 /// actual implementation will only run if a task scheduler is set. In practice, a task scheduler is set
467 /// if implicit multi-threading is turned on.
468 void UnzipCluster(RCluster *cluster);
469
470 /// Returns the default metrics object. Subclasses might alternatively override the method and provide their own metrics object.
471 RNTupleMetrics &GetMetrics() override { return fMetrics; };
472};
473
474} // namespace Detail
475
476} // namespace Experimental
477} // namespace ROOT
478
479#endif
#define d(i)
Definition: RSha256.hxx:102
#define e(i)
Definition: RSha256.hxx:103
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition: TGX11.cxx:110
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition: RCluster.hxx:156
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
Abstract interface to write data into an ntuple.
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
RPageSink & operator=(RPageSink &&)=default
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
std::unique_ptr< RCounters > fCounters
RNTupleDescriptorBuilder fDescriptorBuilder
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(const RPageSink &)=delete
void CommitSealedPage(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage. The column must have been added before.
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)=0
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
void CommitClusterGroup()
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length)=0
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< unsigned char[]> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element)
Helper for unstreaming a page.
virtual void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
std::unique_ptr< RCounters > fCounters
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource & operator=(RPageSource &&)=delete
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
virtual void UnzipClusterImpl(RCluster *)
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RCluster::ColumnSet_t fActiveColumns
The active columns are implicitly defined by the model fields or views.
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
std::deque< RSealedPage > SealedPageSequence_t
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
The serialization context is used for the piecewise serialization of a descriptor.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition: RNTupleUtil.hxx:87
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:47
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:79
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:84
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
static constexpr double s
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
A range of sealed pages referring to the same column that can be used for vector commit.
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
Generic information about the physical location of data.