16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
34#include <shared_mutex>
35#include <unordered_set>
39namespace Experimental {
47class RColumnElementBase;
48class RNTupleCompressor;
49class RNTupleDecompressor;
50struct RNTupleModelChangeset;
78 virtual void AddTask(
const std::function<
void(
void)> &taskFunc) = 0;
104 SealedPageSequence_t::const_iterator
fFirst;
105 SealedPageSequence_t::const_iterator
fLast;
223 virtual std::vector<RNTupleLocator>
CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
240 int compressionSetting,
void *buf);
263 static std::unique_ptr<RPageSink>
Create(std::string_view ntupleName, std::string_view location,
389 std::vector<DescriptorId_t>
fIDs;
449 static std::unique_ptr<RPageSource>
Create(std::string_view ntupleName, std::string_view location,
452 virtual std::unique_ptr<RPageSource>
Clone()
const = 0;
498 virtual std::vector<std::unique_ptr<RCluster>>
LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< DescriptorId_t > ColumnSet_t
A column element encapsulates the translation between basic C++ types and their column representation...
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
A memory region that contains packed and compressed pages.
Abstract interface to write data into an ntuple.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
void CommitDataset()
Finalize the current cluster and the entrire data set.
RPageSink(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
RPageSink & operator=(RPageSink &&)=default
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
virtual std::uint64_t CommitClusterImpl(NTupleSize_t nEntries)=0
Returns the number of bytes written to storage (excluding metadata)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
std::unique_ptr< RCounters > fCounters
RNTupleDescriptorBuilder fDescriptorBuilder
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSink & operator=(const RPageSink &)=delete
void DropColumn(ColumnHandle_t) final
Unregisters a column.
std::unique_ptr< RNTupleWriteOptions > fOptions
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry)
Incorporate incremental changes to the model into the ntuple descriptor.
RPageSink(RPageSink &&)=default
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
std::uint64_t CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
void CommitClusterGroup()
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)
Write a preprocessed page to storage. The column must have been added before.
Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length)=0
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
Keeps track of the requested physical column IDs.
std::vector< DescriptorId_t > fIDs
RCluster::ColumnSet_t ToColumnSet() const
void Erase(DescriptorId_t physicalColumnID)
void Insert(DescriptorId_t physicalColumnID)
std::vector< std::size_t > fRefCounters
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
RNTupleDescriptor * operator->() const
RExclDescriptorGuard(RExclDescriptorGuard &&)=delete
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
std::shared_mutex & fLock
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
RNTupleDescriptor & fDescriptor
void MoveIn(RNTupleDescriptor &&desc)
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
const RNTupleDescriptor & GetRef() const
std::shared_mutex & fLock
RSharedDescriptorGuard(RSharedDescriptorGuard &&)=delete
const RNTupleDescriptor * operator->() const
const RNTupleDescriptor & fDescriptor
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
~RSharedDescriptorGuard()
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
Abstract interface to read data from an ntuple.
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
void Attach()
Open the physical storage container for the tree.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
std::unique_ptr< RCounters > fCounters
virtual void LoadSealedPage(DescriptorId_t physicalColumnId, const RClusterIndex &clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
RNTupleReadOptions fOptions
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
NTupleSize_t GetNEntries()
virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
RPage UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element, DescriptorId_t physicalColumnId)
Helper for unstreaming a page.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
RPageSource(const RPageSource &)=delete
std::shared_mutex fDescriptorLock
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) override
Register a new column.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource & operator=(RPageSource &&)=delete
virtual RNTupleDescriptor AttachImpl()=0
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
virtual void UnzipClusterImpl(RCluster *)
RNTupleMetrics & GetMetrics() override
Returns the default metrics object. Subclasses might alternatively override the method and provide th...
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
void PrepareLoadCluster(const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap, std::function< void(DescriptorId_t, NTupleSize_t, const RClusterDescriptor::RPageRange::RPageInfo &)> perPageFunc)
Prepare a page range read for the column set in clusterKey.
RNTupleMetrics fMetrics
Wraps the I/O counters and is observed by the RNTupleReader metrics.
RPageSource & operator=(const RPageSource &)=delete
void UnzipCluster(RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
RNTupleDescriptor fDescriptor
RPageSource(RPageSource &&)=delete
The interface of a task scheduler to schedule page (de)compression tasks.
virtual void Reset()=0
Start a new set of tasks.
virtual ~RTaskScheduler()=default
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
const std::string & GetNTupleName() const
Returns the NTuple name.
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
std::deque< RSealedPage > SealedPageSequence_t
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
RTaskScheduler * fTaskScheduler
virtual RNTupleMetrics & GetMetrics()=0
Page storage implementations have their own metrics.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
RPageStorage & operator=(const RPageStorage &other)=delete
A page is a slice of a column that is mapped into memory.
The serialization context is used for the piecewise serialization of a descriptor.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The identifiers that specifies the content of a (partial) cluster.
The incremental changes to a RNTupleModel
Default I/O performance counters that get registered in fMetrics.
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuZip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuWrite
RNTupleAtomicCounter & fNPageCommitted
RNTupleAtomicCounter & fTimeWallWrite
RNTupleAtomicCounter & fTimeWallZip
RNTupleAtomicCounter & fSzWritePayload
RNTupleAtomicCounter & fSzZip
Default I/O performance counters that get registered in fMetrics.
RNTupleAtomicCounter & fTimeWallRead
RNTupleAtomicCounter & fNReadV
RNTupleCalcPerf & fBandwidthReadUncompressed
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuUnzip
RNTupleAtomicCounter & fSzReadPayload
RNTupleAtomicCounter & fSzUnzip
RNTupleAtomicCounter & fNPagePopulated
RNTupleAtomicCounter & fNPageLoaded
RNTupleAtomicCounter & fNClusterLoaded
RNTupleAtomicCounter & fTimeWallUnzip
RNTupleTickCounter< RNTupleAtomicCounter > & fTimeCpuRead
RNTupleCalcPerf & fBandwidthUnzip
RNTupleAtomicCounter & fNRead
RNTupleAtomicCounter & fSzReadOverhead
RNTupleCalcPerf & fCompressionRatio
RNTupleCalcPerf & fBandwidthReadCompressed
RNTupleCalcPerf & fFractionReadOverhead
DescriptorId_t fPhysicalId
A range of sealed pages referring to the same column that can be used for vector commit.
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
SealedPageSequence_t::const_iterator fLast
SealedPageSequence_t::const_iterator fFirst
DescriptorId_t fPhysicalColumnId
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage(RSealedPage &&other)=default
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(const RSealedPage &other)=delete
Generic information about the physical location of data.