16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
30#include <unordered_set>
33namespace Experimental {
70 virtual void AddTask(
const std::function<
void(
void)> &taskFunc) = 0;
160 static std::unique_ptr<RPageSink>
Create(std::string_view ntupleName, std::string_view location,
217 static std::unique_ptr<RPageSource>
Create(std::string_view ntupleName, std::string_view location,
220 virtual std::unique_ptr<RPageSource>
Clone()
const = 0;
An in-memory subset of the packed and compressed pages of a cluster.
A collection of Counter objects with a name, a unit, and a description.
Abstract interface to write data into an ntuple.
virtual RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
RPageSink & operator=(RPageSink &&)=default
DescriptorId_t fLastColumnId
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl()=0
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0)=0
Get a new, empty page for the given column that can be filled with up to nElements.
RNTupleDescriptorBuilder fDescriptorBuilder
RPageSink & operator=(const RPageSink &)=delete
void DropColumn(ColumnHandle_t) final
Unregisters a column.
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
RPageSink(RPageSink &&)=default
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
DescriptorId_t fLastFieldId
Building the ntuple descriptor while writing is done in the same way for all the storage sink impleme...
virtual void CreateImpl(const RNTupleModel &model)=0
virtual RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries)=0
RNTupleWriteOptions fOptions
DescriptorId_t fLastClusterId
NTupleSize_t fPrevClusterNEntries
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Abstract interface to read data from an ntuple.
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
void Attach()
Open the physical storage container for the tree.
virtual std::unique_ptr< RCluster > LoadCluster(DescriptorId_t clusterId, const ColumnSet_t &columns)=0
Populates all the pages of the given cluster id and columns; it is possible that some columns do not ...
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
RNTupleReadOptions fOptions
void DropColumn(ColumnHandle_t columnHandle) final
Unregisters a column.
NTupleSize_t GetNEntries()
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
ColumnSet_t fActiveColumns
The active columns are implicitly defined by the model fields or views.
RPageSource & operator=(RPageSource &&)=default
std::unordered_set< DescriptorId_t > ColumnSet_t
Derived from the model (fields) that are actually being requested at a given point in time.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
virtual RNTupleDescriptor AttachImpl()=0
RPageSource(RPageSource &&)=default
virtual void UnzipClusterImpl(RCluster *)
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
const RNTupleDescriptor & GetDescriptor() const
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RNTupleDescriptor fDescriptor
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual ~RTaskScheduler()=default
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
RTaskScheduler * fTaskScheduler
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
virtual RNTupleMetrics & GetMetrics()
Returns an empty metrics. Page storage implementations usually have their own metrics.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Generic information about the physical location of data.