16#ifndef ROOT7_RPageStorage
17#define ROOT7_RPageStorage
36#include <shared_mutex>
37#include <unordered_set>
41namespace Experimental {
48class RColumnElementBase;
49class RNTupleCompressor;
50class RNTupleDecompressor;
51struct RNTupleModelChangeset;
75 virtual void AddTask(
const std::function<
void(
void)> &taskFunc) = 0;
101 SealedPageSequence_t::const_iterator
fFirst;
102 SealedPageSequence_t::const_iterator
fLast;
197 bool allowAlias =
true);
269 if (
fLock !=
nullptr) {
279 if (
fLock !=
nullptr) {
328 virtual void InitImpl(
unsigned char *serializedHeader, std::uint32_t
length) = 0;
339 virtual std::vector<RNTupleLocator>
CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges);
365 static std::unique_ptr<RPageSink>
Create(std::string_view ntupleName, std::string_view location,
482 std::vector<DescriptorId_t>
fIDs;
510 void PrepareLoadCluster(
533 static std::unique_ptr<RPageSource>
Create(std::string_view ntupleName, std::string_view location,
536 virtual std::unique_ptr<RPageSource>
Clone()
const = 0;
557 void Attach() { GetExclDescriptorGuard().MoveIn(AttachImpl()); }
564 void SetEntryRange(
const REntryRange &range);
594 virtual std::vector<std::unique_ptr<RCluster>>
LoadClusters(std::span<RCluster::RKey> clusterKeys) = 0;
601 void UnzipCluster(
RCluster *cluster);
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
An either thread-safe or non thread safe counter for CPU ticks.
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< DescriptorId_t > ColumnSet_t
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RNTupleDescriptor.
const RNTupleDescriptor & GetDescriptor() const
The serialization context is used for the piecewise serialization of a descriptor.
A memory region that contains packed and compressed pages.
Base class for a sink with a physical storage backend.
RPagePersistentSink(const RPagePersistentSink &)=delete
RPagePersistentSink(RPagePersistentSink &&)=default
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
RPagePersistentSink & operator=(RPagePersistentSink &&)=default
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
~RPagePersistentSink() override
RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page)=0
void InitFromDescriptor(const RNTupleDescriptor &descriptor)
Initialize sink based on an existing descriptor and fill into the descriptor builder.
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
std::vector< RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
std::uint64_t CommitCluster(NTupleSize_t nEntries) final
Finalize the current cluster and create a new one for the following data.
const RNTupleDescriptor & GetDescriptor() const final
Return the RNTupleDescriptor being constructed.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
virtual void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
RPagePersistentSink & operator=(const RPagePersistentSink &)=delete
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the location.
void CommitDataset() final
Finalize the current cluster and the entrire data set.
std::unique_ptr< RCounters > fCounters
Internal::RNTupleDescriptorBuilder fDescriptorBuilder
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
std::vector< RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
virtual std::uint64_t CommitClusterImpl()=0
Returns the number of bytes written to storage (excluding metadata)
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
virtual RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
An RAII wrapper used to synchronize a page sink. See GetSinkGuard().
RSinkGuard(RSinkGuard &&)=delete
RSinkGuard & operator=(const RSinkGuard &)=delete
RSinkGuard(const RSinkGuard &)=delete
RSinkGuard(std::mutex *lock)
RSinkGuard & operator=(RSinkGuard &&)=delete
Abstract interface to write data into an ntuple.
bool IsInitialized() const
RPageSink & operator=(RPageSink &&)=default
bool fIsInitialized
Flag if sink was initialized.
virtual const RNTupleDescriptor & GetDescriptor() const =0
Return the RNTupleDescriptor being constructed.
void Init(RNTupleModel &model)
Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket) In...
RPageSink(RPageSink &&)=default
virtual void CommitPage(ColumnHandle_t columnHandle, const RPage &page)=0
Write a page to the storage. The column must have been added before.
const RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
virtual RSinkGuard GetSinkGuard()
RPageSink & operator=(const RPageSink &)=delete
RSealedPage SealPage(const RPage &page, const RColumnElementBase &element, int compressionSetting)
Helper for streaming a page.
virtual void CommitClusterGroup()=0
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)=0
Get a new, empty page for the given column that can be filled with up to nElements.
virtual std::uint64_t CommitCluster(NTupleSize_t nNewEntries)=0
Finalize the current cluster and create a new one for the following data.
RPageSink(const RPageSink &)=delete
virtual void CommitDataset()=0
Finalize the current cluster and the entrire data set.
virtual void CommitSealedPage(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
Write a preprocessed page to storage. The column must have been added before.
virtual void InitImpl(RNTupleModel &model)=0
void DropColumn(ColumnHandle_t) final
Unregisters a column.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
virtual void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry)=0
Incorporate incremental changes to the model into the ntuple descriptor.
virtual void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)=0
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
std::unique_ptr< RNTupleWriteOptions > fOptions
Keeps track of the requested physical column IDs.
std::vector< DescriptorId_t > fIDs
std::vector< std::size_t > fRefCounters
An RAII wrapper used for the writable access to RPageSource::fDescriptor. See GetSharedDescriptorGuar...
RExclDescriptorGuard(RNTupleDescriptor &desc, std::shared_mutex &lock)
void MoveIn(RNTupleDescriptor &&desc)
RExclDescriptorGuard(const RExclDescriptorGuard &)=delete
std::shared_mutex & fLock
RExclDescriptorGuard & operator=(RExclDescriptorGuard &&)=delete
RNTupleDescriptor & fDescriptor
RExclDescriptorGuard & operator=(const RExclDescriptorGuard &)=delete
RNTupleDescriptor * operator->() const
RExclDescriptorGuard(RExclDescriptorGuard &&)=delete
An RAII wrapper used for the read-only access to RPageSource::fDescriptor. See GetExclDescriptorGuard...
const RNTupleDescriptor * operator->() const
const RNTupleDescriptor & GetRef() const
std::shared_mutex & fLock
RSharedDescriptorGuard & operator=(RSharedDescriptorGuard &&)=delete
const RNTupleDescriptor & fDescriptor
RSharedDescriptorGuard(const RSharedDescriptorGuard &)=delete
RSharedDescriptorGuard(RSharedDescriptorGuard &&)=delete
RSharedDescriptorGuard(const RNTupleDescriptor &desc, std::shared_mutex &lock)
~RSharedDescriptorGuard()
RSharedDescriptorGuard & operator=(const RSharedDescriptorGuard &)=delete
Abstract interface to read data from an ntuple.
virtual void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage)=0
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
EPageStorageType GetType() final
Whether the concrete implementation is a sink or a source.
RPageSource(const RPageSource &)=delete
RPageSource & operator=(RPageSource &&)=delete
std::shared_mutex fDescriptorLock
RNTupleReadOptions fOptions
std::unique_ptr< RCounters > fCounters
virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex)=0
Allocates and fills a page that contains the index-th element.
RNTupleDescriptor fDescriptor
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
RPageSource & operator=(const RPageSource &)=delete
virtual RNTupleDescriptor AttachImpl()=0
RPageSource(RPageSource &&)=delete
virtual std::unique_ptr< RPageSource > Clone() const =0
Open the same storage multiple time, e.g. for reading in multiple threads.
const RNTupleReadOptions & GetReadOptions() const
virtual std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys)=0
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
REntryRange fEntryRange
Used by the cluster pool to prevent reading beyond the given range.
REntryRange GetEntryRange() const
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
virtual RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex)=0
Another version of PopulatePage that allows to specify cluster-relative indexes.
void Attach()
Open the physical storage container for the tree.
virtual void UnzipClusterImpl(RCluster *)
The interface of a task scheduler to schedule page (de)compression tasks.
virtual ~RTaskScheduler()=default
virtual void Wait()=0
Blocks until all scheduled tasks finished.
virtual void AddTask(const std::function< void(void)> &taskFunc)=0
Take a callable that represents a task.
Common functionality of an ntuple storage for both reading and writing.
std::deque< RSealedPage > SealedPageSequence_t
virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column)=0
Register a new column.
virtual void DropColumn(ColumnHandle_t columnHandle)=0
Unregisters a column.
virtual Detail::RNTupleMetrics & GetMetrics()
Returns the default metrics object.
RTaskScheduler * fTaskScheduler
const std::string & GetNTupleName() const
Returns the NTuple name.
virtual EPageStorageType GetType()=0
Whether the concrete implementation is a sink or a source.
RPageStorage & operator=(const RPageStorage &other)=delete
RPageStorage(const RPageStorage &other)=delete
RPageStorage(RPageStorage &&other)=default
virtual void ReleasePage(RPage &page)=0
Every page store needs to be able to free pages it handed out.
void SetTaskScheduler(RTaskScheduler *taskScheduler)
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
Detail::RNTupleMetrics fMetrics
A page is a slice of a column that is mapped into memory.
Meta-data for a set of ntuple clusters.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Base class for all ROOT issued exceptions.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
The incremental changes to a RNTupleModel
Default I/O performance counters that get registered in fMetrics.
Detail::RNTupleAtomicCounter & fSzZip
Detail::RNTupleAtomicCounter & fTimeWallZip
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuZip
Detail::RNTupleAtomicCounter & fSzWritePayload
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuWrite
Detail::RNTupleAtomicCounter & fTimeWallWrite
Detail::RNTupleAtomicCounter & fNPageCommitted
Default I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuUnzip
Detail::RNTupleAtomicCounter & fSzReadPayload
Detail::RNTupleAtomicCounter & fTimeWallUnzip
Detail::RNTupleCalcPerf & fBandwidthReadCompressed
Detail::RNTupleAtomicCounter & fNRead
Detail::RNTupleAtomicCounter & fNPagePopulated
Detail::RNTupleTickCounter< Detail::RNTupleAtomicCounter > & fTimeCpuRead
Detail::RNTupleAtomicCounter & fNClusterLoaded
Detail::RNTupleAtomicCounter & fNReadV
Detail::RNTupleAtomicCounter & fSzUnzip
Detail::RNTupleCalcPerf & fBandwidthReadUncompressed
Detail::RNTupleCalcPerf & fFractionReadOverhead
Detail::RNTupleAtomicCounter & fSzReadOverhead
Detail::RNTupleAtomicCounter & fTimeWallRead
Detail::RNTupleAtomicCounter & fNPageLoaded
Detail::RNTupleCalcPerf & fCompressionRatio
Detail::RNTupleCalcPerf & fBandwidthUnzip
Used in SetEntryRange / GetEntryRange.
DescriptorId_t fPhysicalId
A range of sealed pages referring to the same column that can be used for vector commit.
SealedPageSequence_t::const_iterator fLast
SealedPageSequence_t::const_iterator fFirst
DescriptorId_t fPhysicalColumnId
RSealedPageGroup(DescriptorId_t d, SealedPageSequence_t::const_iterator b, SealedPageSequence_t::const_iterator e)
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RSealedPage(const RSealedPage &other)=delete
RSealedPage & operator=(const RSealedPage &other)=delete
RSealedPage(const void *b, std::uint32_t s, std::uint32_t n)
RSealedPage(RSealedPage &&other)=default
Generic information about the physical location of data.