44#include <condition_variable>
54 static std::once_flag
once;
55 std::call_once(
once, []() {
57 <<
"Do not store real data with this version of RNTuple!";
59 fCompressor = std::make_unique<RNTupleCompressor>();
101 fCounters->fNPageCommitted.Inc();
102 fCounters->fSzWritePayload.Add(
sealedPage.fSize);
117 fCounters->fSzZip.Add(
page.GetNBytes());
126 fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
physicalColumnId).GetModel().GetType());
132std::vector<ROOT::Experimental::RNTupleLocator>
136 for (
auto &
range : ranges) {
143 fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
range.fPhysicalColumnId).GetModel().GetType());
149 if (
size >= std::numeric_limits<std::int32_t>::max() ||
bytesPacked >= std::numeric_limits<std::int32_t>::max()) {
160 std::vector<ROOT::Experimental::RNTupleLocator>
locators;
161 for (
auto &
range : ranges) {
172 fCounters->fNPageCommitted.Add(
locators.size());
173 fCounters->fSzWritePayload.Add(
size);
174 fNBytesCurrentCluster +=
size;
181 auto result = fNBytesCurrentCluster;
182 fNBytesCurrentCluster = 0;
221 fPageAllocator->DeletePage(
page);
231 fClusterPool(std::make_unique<
RClusterPool>(*
this, options.GetClusterBunchSize()))
238 std::unique_ptr<ROOT::Internal::RRawFile> file,
242 fFile = std::move(file);
259 if (
anchor.GetVersionEpoch() == 0) {
260 static std::once_flag
once;
266 fDescriptorBuilder.SetOnDiskHeaderSize(
anchor.GetNBytesHeader());
267 auto buffer = std::make_unique<unsigned char[]>(
anchor.GetLenHeader());
268 auto zipBuffer = std::make_unique<unsigned char[]>(
anchor.GetNBytesHeader());
270 fDecompressor->Unzip(
zipBuffer.get(),
anchor.GetNBytesHeader(),
anchor.GetLenHeader(), buffer.get());
273 fDescriptorBuilder.AddToOnDiskFooterSize(
anchor.GetNBytesFooter());
274 buffer = std::make_unique<unsigned char[]>(
anchor.GetLenFooter());
275 zipBuffer = std::make_unique<unsigned char[]>(
anchor.GetNBytesFooter());
277 fDecompressor->Unzip(
zipBuffer.get(),
anchor.GetNBytesFooter(),
anchor.GetLenFooter(), buffer.get());
281std::unique_ptr<ROOT::Experimental::Internal::RPageSourceFile>
286 throw RException(
R__FAIL(
"This RNTuple object was not streamed from a ROOT file (TFile or descendant)"));
288 std::unique_ptr<ROOT::Internal::RRawFile>
rawFile;
292 auto url =
anchor.fFile->GetEndpointUrl();
293 auto protocol = std::string(
url->GetProtocol());
296 }
else if (protocol ==
"http" || protocol ==
"https" || protocol ==
"root" || protocol ==
"roots") {
302 auto pageSource = std::make_unique<RPageSourceFile>(
"", std::move(
rawFile), options);
314 if (fDescriptorBuilder.GetDescriptor().GetOnDiskHeaderSize() == 0) {
315 auto anchor = fReader.GetNTuple(fNTupleName).Unwrap();
319 auto desc = fDescriptorBuilder.MoveDescriptor();
321 for (
const auto &
cgDesc : desc.GetClusterGroupIterable()) {
322 auto buffer = std::make_unique<unsigned char[]>(
cgDesc.GetPageListLength());
323 auto zipBuffer = std::make_unique<unsigned char[]>(
cgDesc.GetPageListLocator().fBytesOnStorage);
324 fReader.ReadBuffer(
zipBuffer.get(),
cgDesc.GetPageListLocator().fBytesOnStorage,
325 cgDesc.GetPageListLocator().GetPosition<std::uint64_t>());
326 fDecompressor->Unzip(
zipBuffer.get(),
cgDesc.GetPageListLocator().fBytesOnStorage,
cgDesc.GetPageListLength(),
354 pageInfo.fLocator.GetPosition<std::uint64_t>());
388 fCounters->fNPageLoaded.Inc();
389 fCounters->fNRead.Inc();
393 if (!fCurrentCluster || (fCurrentCluster->GetId() !=
clusterId) || !fCurrentCluster->ContainsColumn(
columnId))
394 fCurrentCluster = fClusterPool->GetCluster(
clusterId, fActivePhysicalColumns.ToColumnSet());
402 auto onDiskPage = fCurrentCluster->GetOnDiskPage(key);
416 fPagePool->RegisterPage(
418 fCounters->fNPagePopulated.Inc();
476 fPagePool->ReturnPage(
page);
484 return std::unique_ptr<RPageSourceFile>(
clone);
487std::unique_ptr<ROOT::Experimental::Internal::RCluster>
494 std::uint64_t fOffset = 0;
495 std::uint64_t
fSize = 0;
501 auto pageZeroMap = std::make_unique<ROnDiskPageMap>();
523 std::vector<std::size_t>
gaps;
524 for (
unsigned i = 1; i <
onDiskPages.size(); ++i) {
531 for (
auto g :
gaps) {
568 s.fBufPos =
reinterpret_cast<intptr_t
>(req.
fBuffer);
574 fCounters->fSzReadPayload.Add(
szPayload);
578 auto buffer =
new unsigned char[
reinterpret_cast<intptr_t
>(req.
fBuffer) + req.
fSize];
579 auto pageMap = std::make_unique<ROnDiskPageMapHeap>(std::unique_ptr<
unsigned char []>(buffer));
597std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
600 fCounters->fNClusterLoaded.Add(
clusterKeys.size());
602 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
clusters;
603 std::vector<ROOT::Internal::RRawFile::RIOVec>
readRequests;
618 for (std::size_t i = 0; i <
nBatch; ++i) {
640 fCounters->fNReadV.Inc();
641 fCounters->fNRead.Add(
nBatch);
658 std::vector<std::unique_ptr<RColumnElementBase>>
allElements;
669 for (
const auto &pi :
pageRange.fPageInfos) {
681 fPagePool->PreloadPage(
692 fCounters->fNPagePopulated.Add(
cluster->GetNOnDiskPages());
694 fTaskScheduler->Wait();
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
#define R__LOG_WARNING(...)
TObject * clone(const char *newname) const override
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Managed a set of clusters containing compressed and packed pages.
An in-memory subset of the packed and compressed pages of a cluster.
std::size_t GetBitsOnStorage() const
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
static Writer_t MakeMemCopyWriter(unsigned char *dest)
static RNTupleFileWriter * Append(std::string_view ntupleName, TFile &file)
Add a new RNTuple identified by ntupleName to the existing TFile.
static RNTupleFileWriter * Recreate(std::string_view ntupleName, std::string_view path, int defaultCompression, EContainerFormat containerFormat)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId, RNTupleDescriptor &desc)
A page as being stored on disk, that is packed and compressed.
Uses standard C++ memory allocation for the column data pages.
static void DeletePage(const RPage &page)
Releases the memory pointed to by page and resets the page's information.
A closure that can free the memory associated with a mapped page.
Base class for a sink with a physical storage backend.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges)
Vector commit of preprocessed pages.
A thread-safe cache of column pages.
Storage provider that write ntuple pages into a file.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges) final
Vector commit of preprocessed pages.
std::uint64_t CommitClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
~RPageSinkFile() override
RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options)
std::unique_ptr< RNTupleFileWriter > fWriter
void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
std::unique_ptr< RNTupleCompressor > fCompressor
Helper to zip pages and header/footer; includes a 16MB (kMAXZIPBUF) zip buffer.
Storage provider that reads ntuple pages from a file.
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options=RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
void UnzipClusterImpl(RCluster *cluster) final
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::vector< std::unique_ptr< RCluster > > LoadClusters(std::span< RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
std::unique_ptr< RCluster > PrepareSingleCluster(const RCluster::RKey &clusterKey, std::vector< ROOT::Internal::RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ClusterSize_t::ValueType idxInCluster)
void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by selaedPage.
~RPageSourceFile() override
void InitDescriptor(const RNTuple &anchor)
Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder.
RNTupleDescriptor AttachImpl() final
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
Abstract interface to read data from an ntuple.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
std::unique_ptr< RNTupleDecompressor > fDecompressor
Helper to unzip pages and header/footer; comprises a 16MB (kMAXZIPBUF) unzip buffer.
Stores information about the cluster in which this page resides.
A page is a slice of a column that is mapped into memory.
static RPage MakePageZero(ColumnId_t columnId, ClusterSize_t::ValueType elementSize)
Make a 'zero' page for column columnId (that is comprised of 0x00 bytes only).
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Base class for all ROOT issued exceptions.
The on-storage meta-data of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
int GetCompression() const
Representation of an RNTuple data set in a ROOT file.
static constexpr std::uint16_t kVersionEpoch
The RRawFileTFile wraps an open TFile, but does not take ownership.
The RRawFile provides read-only access to local and remote files.
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
const_iterator begin() const
const_iterator end() const
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
On-disk pages within a page source are identified by the column and page number.
Summarizes cluster-level information that are necessary to populate a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.
Used for vector reads from multiple offsets into multiple buffers.
std::size_t fSize
The number of desired bytes.
void * fBuffer
The destination for reading.
std::uint64_t fOffset
The file offset.