16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
33#include <unordered_set>
37namespace Experimental {
45class RColumnElementBase;
46class RNTupleCompressor;
47class RNTupleDecompressor;
75 virtual void AddTask(
const std::function<
void(
void)> &taskFunc) = 0;
205 int compressionSetting,
void *buf);
228 static std::unique_ptr<RPageSink>
Create(std::string_view ntupleName, std::string_view location,
333 static std::unique_ptr<RPageSource>
Create(std::string_view ntupleName, std::string_view location,
336 virtual std::unique_ptr<RPageSource>
Clone()
const = 0;
369 virtual std::vector<std::unique_ptr<RCluster>>
LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< DescriptorId_t > ColumnSet_t
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
Abstract interface to write data into an ntuple.
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
virtual RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(RPageSink &&)=default
DescriptorId_t fLastColumnId
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
std::unique_ptr< RCounters > fCounters
virtual void CommitDatasetImpl()=0
RNTupleDescriptorBuilder fDescriptorBuilder
RPageSink & operator=(const RPageSink &)=delete
void CommitSealedPage(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage.
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
RPageSink(RPageSink &&)=default
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t columnId, const RPageStorage::RSealedPage &sealedPage)=0
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
DescriptorId_t fLastFieldId
Building the ntuple descriptor while writing is done in the same way for all the storage sink impleme...
virtual void CreateImpl(const RNTupleModel &model)=0
DescriptorId_t fLastClusterId
NTupleSize_t fPrevClusterNEntries
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< unsigned char[]> UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element)
Helper for unstreaming a page.
virtual void LoadSealedPage(DescriptorId_t columnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
std::unique_ptr< RCounters > fCounters
RNTupleReadOptions fOptions
NTupleSize_t GetNEntries()
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
RPageSource & operator=(RPageSource &&)=default
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
RPageSource(RPageSource &&)=default
virtual RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
virtual void UnzipClusterImpl(RCluster *)
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
const RNTupleDescriptor & GetDescriptor() const
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RCluster::ColumnSet_t fActiveColumns
The active columns are implicitly defined by the model fields or views.
RNTupleDescriptor fDescriptor
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual ~RTaskScheduler()=default
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
RTaskScheduler * fTaskScheduler
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
RNTupleAtomicCounter & fNPageCommitted
RNTupleAtomicCounter & fTimeWallWrite
RNTupleAtomicCounter & fTimeWallZip
RNTupleAtomicCounter & fSzWritePayload
RNTupleAtomicCounter & fSzZip
Default I/O performance counters that get registered in fMetrics.
RNTupleAtomicCounter & fTimeWallRead
RNTupleAtomicCounter & fNReadV
RNTupleCalcPerf & fBandwidthReadUncompressed
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleAtomicCounter & fSzReadPayload
RNTupleAtomicCounter & fSzUnzip
RNTupleAtomicCounter & fNPagePopulated
RNTupleAtomicCounter & fNPageLoaded
RNTupleAtomicCounter & fNClusterLoaded
RNTupleAtomicCounter & fTimeWallUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
RNTupleCalcPerf & fBandwidthUnzip
RNTupleAtomicCounter & fNRead
RNTupleAtomicCounter & fSzReadOverhead
RNTupleCalcPerf & fCompressionRatio
RNTupleCalcPerf & fBandwidthReadCompressed
RNTupleCalcPerf & fFractionReadOverhead
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage(RSealedPage &&other)=default
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
Generic information about the physical location of data.