Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorage.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageStorage.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
18
19#include <ROOT/RCluster.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RPage.hxx>
26#include <ROOT/RSpan.hxx>
27#include <ROOT/RStringView.hxx>
28
29#include <atomic>
30#include <cstddef>
31#include <deque>
32#include <functional>
33#include <memory>
34#include <shared_mutex>
35#include <unordered_set>
36#include <vector>
37
38namespace ROOT {
39namespace Experimental {
40
41class RNTupleModel;
42// TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
43
44namespace Detail {
45
46class RColumn;
47class RColumnElementBase;
48class RNTupleCompressor;
49class RNTupleDecompressor;
50struct RNTupleModelChangeset;
51class RPagePool;
52class RFieldBase;
53
54enum class EPageStorageType {
55 kSink,
56 kSource,
57};
58
59// clang-format off
60/**
61\class ROOT::Experimental::Detail::RPageStorage
62\ingroup NTuple
63\brief Common functionality of an ntuple storage for both reading and writing
64
65The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
66an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
67*/
68// clang-format on
70public:
71 /// The interface of a task scheduler to schedule page (de)compression tasks
73 public:
74 virtual ~RTaskScheduler() = default;
75 /// Start a new set of tasks
76 virtual void Reset() = 0;
77 /// Take a callable that represents a task
78 virtual void AddTask(const std::function<void(void)> &taskFunc) = 0;
79 /// Blocks until all scheduled tasks finished
80 virtual void Wait() = 0;
81 };
82
83 /// A sealed page contains the bytes of a page as written to storage (packed & compressed). It is used
84 /// as an input to UnsealPages() as well as to transfer pages between different storage media.
85 /// RSealedPage does _not_ own the buffer it is pointing to in order to not interfere with the memory management
86 /// of concrete page sink and page source implementations.
87 struct RSealedPage {
88 const void *fBuffer = nullptr;
89 std::uint32_t fSize = 0;
90 std::uint32_t fNElements = 0;
91
92 RSealedPage() = default;
93 RSealedPage(const void *b, std::uint32_t s, std::uint32_t n) : fBuffer(b), fSize(s), fNElements(n) {}
94 RSealedPage(const RSealedPage &other) = delete;
95 RSealedPage& operator =(const RSealedPage &other) = delete;
96 RSealedPage(RSealedPage &&other) = default;
97 RSealedPage& operator =(RSealedPage &&other) = default;
98 };
99
100 using SealedPageSequence_t = std::deque<RSealedPage>;
101 /// A range of sealed pages referring to the same column that can be used for vector commit
104 SealedPageSequence_t::const_iterator fFirst;
105 SealedPageSequence_t::const_iterator fLast;
106
107 RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
109 {
110 }
111 };
112
113protected:
114 std::string fNTupleName;
117 {
118 if (!fTaskScheduler)
119 return;
122 }
123
124public:
125 explicit RPageStorage(std::string_view name);
126 RPageStorage(const RPageStorage &other) = delete;
127 RPageStorage& operator =(const RPageStorage &other) = delete;
128 RPageStorage(RPageStorage &&other) = default;
130 virtual ~RPageStorage();
131
132 /// Whether the concrete implementation is a sink or a source
134
137 const RColumn *fColumn = nullptr;
138
139 /// Returns true for a valid column handle; fColumn and fPhysicalId should always either both
140 /// be valid or both be invalid.
141 explicit operator bool() const { return fPhysicalId != kInvalidDescriptorId && fColumn; }
142 };
143 /// The column handle identifies a column with the current open page storage
145
146 /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
147 /// When writing, every column can only be attached once.
148 virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
149 /// Unregisters a column. A page source decreases the reference counter for the corresponding active column.
150 /// For a page sink, dropping columns is currently a no-op.
151 virtual void DropColumn(ColumnHandle_t columnHandle) = 0;
152
153 /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
154 /// of allocating pages.
155 virtual void ReleasePage(RPage &page) = 0;
156
157 /// Page storage implementations have their own metrics. The RPageSink and RPageSource classes provide
158 /// a default set of metrics.
160 /// Returns the NTuple name.
161 const std::string &GetNTupleName() const { return fNTupleName; }
162
163 void SetTaskScheduler(RTaskScheduler *taskScheduler) { fTaskScheduler = taskScheduler; }
164};
165
166// clang-format off
167/**
168\class ROOT::Experimental::Detail::RPageSink
169\ingroup NTuple
170\brief Abstract interface to write data into an ntuple
171
172The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
173The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
174up to the given entry number are committed.
175*/
176// clang-format on
177class RPageSink : public RPageStorage {
178private:
179 /// Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization
181
182protected:
183 /// Default I/O performance counters that get registered in fMetrics
184 struct RCounters {
192 };
193 std::unique_ptr<RCounters> fCounters;
195
196 std::unique_ptr<RNTupleWriteOptions> fOptions;
197
198 /// Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
199 /// There could be concrete page sinks that don't need a compressor. Therefore, and in order to stay consistent
200 /// with the page source, we leave it up to the derived class whether or not the compressor gets constructed.
201 std::unique_ptr<RNTupleCompressor> fCompressor;
202
203 /// Remembers the starting cluster id for the next cluster group
204 std::uint64_t fNextClusterInGroup = 0;
205 /// Used to calculate the number of entries in the current cluster
207 /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
208 std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
209 /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
210 std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
212
213 virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) = 0;
214 virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) = 0;
215 virtual RNTupleLocator
216 CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) = 0;
217 /// Vector commit of preprocessed pages. The `ranges` array specifies a range of sealed pages to be
218 /// committed for each column. The returned vector contains, in order, the RNTupleLocator for each
219 /// page on each range in `ranges`, i.e. the first N entries refer to the N pages in `ranges[0]`,
220 /// followed by M entries that refer to the M pages in `ranges[1]`, etc.
221 /// The default is to call `CommitSealedPageImpl` for each page; derived classes may provide an
222 /// optimized implementation though.
223 virtual std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
224 /// Returns the number of bytes written to storage (excluding metadata)
225 virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) = 0;
226 /// Returns the locator of the page list envelope of the given buffer that contains the serialized page list.
227 /// Typically, the implementation takes care of compressing and writing the provided buffer.
228 virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) = 0;
229 virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) = 0;
230
231 /// Helper for streaming a page. This is commonly used in derived, concrete page sinks. Note that if
232 /// compressionSetting is 0 (uncompressed) and the page is mappable, the returned sealed page will
233 /// point directly to the input page buffer. Otherwise, the sealed page references an internal buffer
234 /// of fCompressor. Thus, the buffer pointed to by the RSealedPage should never be freed.
235 /// Usage of this method requires construction of fCompressor.
236 RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting);
237
238 /// Seal a page using the provided buffer.
239 static RSealedPage SealPage(const RPage &page, const RColumnElementBase &element,
240 int compressionSetting, void *buf);
241
242 /// Enables the default set of metrics provided by RPageSink. `prefix` will be used as the prefix for
243 /// the counters registered in the internal RNTupleMetrics object.
244 /// This set of counters can be extended by a subclass by calling `fMetrics.MakeCounter<...>()`.
245 ///
246 /// A subclass using the default set of metrics is always responsible for updating the counters
247 /// appropriately, e.g. `fCounters->fNPageCommited.Inc()`
248 ///
249 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
250 /// GetMetrics() member function.
251 void EnableDefaultMetrics(const std::string &prefix);
252
253public:
254 RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
255
256 RPageSink(const RPageSink&) = delete;
257 RPageSink& operator=(const RPageSink&) = delete;
258 RPageSink(RPageSink&&) = default;
260 ~RPageSink() override;
261
262 /// Guess the concrete derived page source from the file name (location)
263 static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
264 const RNTupleWriteOptions &options = RNTupleWriteOptions());
266 /// Returns the sink's write options.
267 const RNTupleWriteOptions &GetWriteOptions() const { return *fOptions; }
268
269 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
270 void DropColumn(ColumnHandle_t /*columnHandle*/) final {}
271
272 /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
273 /// To do so, Create() calls CreateImpl() after updating the descriptor.
274 /// Create() associates column handles to the columns referenced by the model
275 void Create(RNTupleModel &model);
276 /// Incorporate incremental changes to the model into the ntuple descriptor. This happens, e.g. if new fields were
277 /// added after the initial call to `RPageSink::Create(RNTupleModel &)`.
278 /// `firstEntry` specifies the global index for the first stored element in the added columns.
279 virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry);
280
281 /// Write a page to the storage. The column must have been added before.
282 void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
283 /// Write a preprocessed page to storage. The column must have been added before.
284 void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage);
285 /// Write a vector of preprocessed pages to storage. The corresponding columns must have been added before.
286 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges);
287 /// Finalize the current cluster and create a new one for the following data.
288 /// Returns the number of bytes written to storage (excluding meta-data).
289 std::uint64_t CommitCluster(NTupleSize_t nEntries);
290 /// Write out the page locations (page list envelope) for all the committed clusters since the last call of
291 /// CommitClusterGroup (or the beginning of writing).
292 void CommitClusterGroup();
293 /// Finalize the current cluster and the entrire data set.
294 void CommitDataset();
295
296 /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
297 /// the page sink picks an appropriate size.
298 virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) = 0;
299
300 /// Returns the default metrics object. Subclasses might alternatively provide their own metrics object by overriding this.
301 RNTupleMetrics &GetMetrics() override { return fMetrics; };
302};
303
304// clang-format off
305/**
306\class ROOT::Experimental::Detail::RPageSource
307\ingroup NTuple
308\brief Abstract interface to read data from an ntuple
309
310The page source is initialized with the columns of interest. Alias columns from projected fields are mapped to the
311corresponding physical columns. Pages from the columns of interest can then be mapped into memory.
312The page source also gives access to the ntuple's meta-data.
313*/
314// clang-format on
315class RPageSource : public RPageStorage {
316public:
317 /// An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard().
320 std::shared_mutex &fLock;
321
322 public:
323 RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
324 {
325 fLock.lock_shared();
326 }
331 ~RSharedDescriptorGuard() { fLock.unlock_shared(); }
332 const RNTupleDescriptor *operator->() const { return &fDescriptor; }
333 const RNTupleDescriptor &GetRef() const { return fDescriptor; }
334 };
335
336 /// An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuard().
339 std::shared_mutex &fLock;
340
341 public:
342 RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock) : fDescriptor(desc), fLock(lock)
343 {
344 fLock.lock();
345 }
351 {
353 fLock.unlock();
354 }
356 void MoveIn(RNTupleDescriptor &&desc) { fDescriptor = std::move(desc); }
357 };
358
359private:
361 mutable std::shared_mutex fDescriptorLock;
362
363protected:
364 /// Default I/O performance counters that get registered in fMetrics
365 struct RCounters {
383 };
384
385 /// Keeps track of the requested physical column IDs. When using alias columns (projected fields), physical
386 /// columns may be requested multiple times.
388 private:
389 std::vector<DescriptorId_t> fIDs;
390 std::vector<std::size_t> fRefCounters;
391
392 public:
393 void Insert(DescriptorId_t physicalColumnID);
394 void Erase(DescriptorId_t physicalColumnID);
396 };
397
398 std::unique_ptr<RCounters> fCounters;
399 /// Wraps the I/O counters and is observed by the RNTupleReader metrics
401
403 /// The active columns are implicitly defined by the model fields or views
405
406 /// Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
407 /// Not all page sources need a decompressor (e.g. virtual ones for chains and friends don't), thus we
408 /// leave it up to the derived class whether or not the decompressor gets constructed.
409 std::unique_ptr<RNTupleDecompressor> fDecompressor;
410
412 // Only called if a task scheduler is set. No-op be default.
413 virtual void UnzipClusterImpl(RCluster * /* cluster */)
414 { }
415
416 /// Helper for unstreaming a page. This is commonly used in derived, concrete page sources. The implementation
417 /// currently always makes a memory copy, even if the sealed page is uncompressed and in the final memory layout.
418 /// The optimization of directly mapping pages is left to the concrete page source implementations.
419 /// Usage of this method requires construction of fDecompressor. Memory is allocated via
420 /// `RPageAllocatorHeap`; use `RPageAllocatorHeap::DeletePage()` to deallocate returned pages.
421 RPage UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element, DescriptorId_t physicalColumnId);
422
423 /// Prepare a page range read for the column set in `clusterKey`. Specifically, pages referencing the
424 /// `kTypePageZero` locator are filled in `pageZeroMap`; otherwise, `perPageFunc` is called for each page. This is
425 /// commonly used as part of `LoadClusters()` in derived classes.
427 const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap,
428 std::function<void(DescriptorId_t, NTupleSize_t, const RClusterDescriptor::RPageRange::RPageInfo &)> perPageFunc);
429
430 /// Enables the default set of metrics provided by RPageSource. `prefix` will be used as the prefix for
431 /// the counters registered in the internal RNTupleMetrics object.
432 /// A subclass using the default set of metrics is responsible for updating the counters
433 /// appropriately, e.g. `fCounters->fNRead.Inc()`
434 /// Alternatively, a subclass might provide its own RNTupleMetrics object by overriding the
435 /// GetMetrics() member function.
436 void EnableDefaultMetrics(const std::string &prefix);
437
438 /// Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further information.
440
441public:
442 RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions);
443 RPageSource(const RPageSource&) = delete;
447 ~RPageSource() override;
448 /// Guess the concrete derived page source from the file name (location)
449 static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
450 const RNTupleReadOptions &options = RNTupleReadOptions());
451 /// Open the same storage multiple time, e.g. for reading in multiple threads
452 virtual std::unique_ptr<RPageSource> Clone() const = 0;
453
455 const RNTupleReadOptions &GetReadOptions() const { return fOptions; }
456
457 /// Takes the read lock for the descriptor. Multiple threads can take the lock concurrently.
458 /// The underlying std::shared_mutex, however, is neither read nor write recursive:
459 /// within one thread, only one lock (shared or exclusive) must be acquired at the same time. This requires special
460 /// care in sections protected by GetSharedDescriptorGuard() and GetExclDescriptorGuard() especially to avoid that
461 /// the locks are acquired indirectly (e.g. by a call to GetNEntries()).
462 /// As a general guideline, no other method of the page source should be called (directly or indirectly) in a
463 /// guarded section.
465 {
467 }
468
469 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override;
470 void DropColumn(ColumnHandle_t columnHandle) override;
471
472 /// Open the physical storage container for the tree
477
478 /// Allocates and fills a page that contains the index-th element
479 virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
480 /// Another version of PopulatePage that allows to specify cluster-relative indexes
481 virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
482
483 /// Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage. The sealed page
484 /// can be used subsequently in a call to RPageSink::CommitSealedPage.
485 /// The fSize and fNElements member of the sealedPage parameters are always set. If sealedPage.fBuffer is nullptr,
486 /// no data will be copied but the returned size information can be used by the caller to allocate a large enough
487 /// buffer and call LoadSealedPage again.
488 virtual void
489 LoadSealedPage(DescriptorId_t physicalColumnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage) = 0;
490
491 /// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
492 /// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
493 /// To indicate which columns have been loaded, LoadClusters() must mark them with SetColumnAvailable().
494 /// That includes the ones from the `columns` that don't have pages; otherwise subsequent requests
495 /// for the cluster would assume an incomplete cluster and trigger loading again.
496 /// LoadClusters() is typically called from the I/O thread of a cluster pool, i.e. the method runs
497 /// concurrently to other methods of the page source.
498 virtual std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
499
500 /// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
501 /// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
502 /// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the
503 /// actual implementation will only run if a task scheduler is set. In practice, a task scheduler is set
504 /// if implicit multi-threading is turned on.
505 void UnzipCluster(RCluster *cluster);
506
507 /// Returns the default metrics object. Subclasses might alternatively override the method and provide their own metrics object.
508 RNTupleMetrics &GetMetrics() override { return fMetrics; };
509};
510
511} // namespace Detail
512
513} // namespace Experimental
514} // namespace ROOT
515
516#endif
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define e(i)
Definition RSha256.hxx:103
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition TGX11.cxx:110
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:155
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:157
A column element encapsulates the translation between basic C++ types and their column representation...
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:105
Abstract interface to write data into an ntuple.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
RPageSink & operator=(RPageSink &&)=default
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
std::unique_ptr< RCounters > fCounters
RNTupleDescriptorBuilder fDescriptorBuilder
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(const RPageSink &)=delete
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry)
Incorporate incremental changes to the model into the ntuple descriptor.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
void CommitClusterGroup()
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage. The column must have been added before.
Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length)=0
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Keeps track of the requested physical column IDs.
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< RCounters > fCounters
virtual void LoadSealedPage(DescriptorId_t physicalColumnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
RPage UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element, DescriptorId_t physicalColumnId)
Helper for unstreaming a page.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource & operator=(RPageSource &&)=delete
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
virtual void UnzipClusterImpl(RCluster *)
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
void PrepareLoadCluster(const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap, std::function< void(DescriptorId_t, NTupleSize_t, const RClusterDescriptor::RPageRange::RPageInfo &)> perPageFunc)
Prepare a page range read for the column set in clusterKey.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
std::deque< RSealedPage > SealedPageSequence_t
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:42
The serialization context is used for the piecewise serialization of a descriptor.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:159
The incremental changes to a RNTupleModel
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
A range of sealed pages referring to the same column that can be used for vector commit.
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
We do not need to store the element size / uncompressed page size because we know to which column the...
Generic information about the physical location of data.