107 const TClass *cl =
nullptr;
121 std::to_string(
field->GetTypeVersion()) +
"]"));
139 RNTupleAtomicTimer
timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
146 fCounters->fNPageCommitted.Inc();
147 fCounters->fSzWritePayload.Add(
sealedPage.GetBufferSize());
148 fNBytesCurrentCluster +=
sealedPage.GetBufferSize();
158 RNTupleAtomicTimer
timer(fCounters->fTimeWallZip, fCounters->fTimeCpuZip);
162 fCounters->fSzZip.Add(
page.GetNBytes());
169 const auto nBits = fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
physicalColumnId).GetBitsOnStorage();
176 RNTupleAtomicTimer
timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
178 std::uint64_t
offset = fWriter->ReserveBlob(
batch.fSize,
batch.fBytesPacked);
191 fCounters->fNPageCommitted.Add(
batch.fSealedPages.size());
192 fCounters->fSzWritePayload.Add(
batch.fSize);
193 fNBytesCurrentCluster +=
batch.fSize;
196 batch.fBytesPacked = 0;
197 batch.fSealedPages.clear();
200std::vector<ROOT::RNTupleLocator>
202 const std::vector<bool> &
mask)
204 const std::uint64_t
maxKeySize = fOptions->GetMaxKeySize();
207 std::vector<RNTupleLocator>
locators;
209 std::size_t
iPage = 0;
218 fDescriptorBuilder.GetDescriptor().GetColumnDescriptor(
range.fPhysicalColumnId).GetBitsOnStorage();
252 fCounters->fNPageCommitted.Inc();
253 fCounters->fSzWritePayload.Add(
sealedPageIt->GetBufferSize());
264 if (
batch.fSize > 0) {
273 auto result = fNBytesCurrentCluster;
274 fNBytesCurrentCluster = 0;
296 for (
const auto &
extraTypeInfo : fDescriptorBuilder.GetDescriptor().GetExtraTypeInfoIterable()) {
304 fWriter->UpdateStreamerInfos(fInfosOfClassFields);
310 return fWriter->Commit(GetWriteOptions().GetCompression());
313std::unique_ptr<ROOT::Internal::RPageSink>
329 "cumulative seek distance (excluding header/footer reads)"),
331 "szFile",
"B",
"total file size",
fMetrics,
334 return {true, static_cast<double>(fFileSize)};
339 "ratio of seek distance to bytes read (excluding file structure reads)",
fMetrics,
355 "ratio of bytes read to total file size (excluding file structure reads)",
fMetrics,
370 std::unique_ptr<ROOT::Internal::RRawFile> file,
374 fFile = std::move(file);
385std::unique_ptr<ROOT::Internal::RPageSourceFile>
389 throw RException(
R__FAIL(
"This RNTuple object was not streamed from a ROOT file (TFile or descendant)"));
391 std::unique_ptr<ROOT::Internal::RRawFile>
rawFile;
395 const std::string className =
anchor.fFile->IsA()->GetName();
396 const auto url =
anchor.fFile->GetEndpointUrl();
397 if (className ==
"TFile") {
399 }
else if (className ==
"TDavixFile" || className ==
"TCurlFile" || className ==
"TNetXNGFile") {
405 auto pageSource = std::make_unique<RPageSourceFile>(
"", std::move(
rawFile), options);
413 StopClusterPoolBackgroundThread();
416std::unique_ptr<ROOT::Internal::RPageSource>
425 auto pageSource = std::make_unique<RPageSourceFile>(
"", fFile->Clone(), options);
436 fAnchor = fReader.GetNTuple(fNTupleName).Unwrap();
438 fReader.SetMaxKeySize(fAnchor->GetMaxKeySize());
440 fDescriptorBuilder.SetVersion(fAnchor->GetVersionEpoch(), fAnchor->GetVersionMajor(), fAnchor->GetVersionMinor(),
441 fAnchor->GetVersionPatch());
442 fDescriptorBuilder.SetOnDiskHeaderSize(fAnchor->GetNBytesHeader());
443 fDescriptorBuilder.AddToOnDiskFooterSize(fAnchor->GetNBytesFooter());
446 const auto bufSize = fAnchor->GetNBytesHeader() + fAnchor->GetNBytesFooter() +
447 std::max(fAnchor->GetLenHeader(), fAnchor->GetLenFooter());
449 fStructureBuffer.fPtrHeader = fStructureBuffer.fBuffer.get();
450 fStructureBuffer.fPtrFooter = fStructureBuffer.fBuffer.get() + fAnchor->GetNBytesHeader();
457 (std::max(fAnchor->GetNBytesHeader(), fAnchor->GetNBytesFooter()) >
readvLimits.fMaxSingleSize) ||
458 (fAnchor->GetNBytesHeader() + fAnchor->GetNBytesFooter() >
readvLimits.fMaxTotalSize)) {
459 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
460 fReader.ReadBuffer(fStructureBuffer.fPtrHeader, fAnchor->GetNBytesHeader(), fAnchor->GetSeekHeader());
461 fReader.ReadBuffer(fStructureBuffer.fPtrFooter, fAnchor->GetNBytesFooter(), fAnchor->GetSeekFooter());
462 fCounters->fNRead.Add(2);
464 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
465 R__ASSERT(fAnchor->GetNBytesHeader() < std::numeric_limits<std::size_t>::max());
466 R__ASSERT(fAnchor->GetNBytesFooter() < std::numeric_limits<std::size_t>::max());
468 static_cast<std::size_t
>(fAnchor->GetNBytesHeader()), 0},
469 {fStructureBuffer.fPtrFooter, fAnchor->GetSeekFooter(),
470 static_cast<std::size_t
>(fAnchor->GetNBytesFooter()), 0}};
472 fCounters->fNReadV.Inc();
478 auto unzipBuf =
reinterpret_cast<unsigned char *
>(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter();
490 if (fNTupleName.empty())
491 fNTupleName = fDescriptorBuilder.GetDescriptor().GetName();
494 fFile->SetBuffering(
false);
497 fFileSize = fFile->GetSize();
499 return fDescriptorBuilder.MoveDescriptor();
504 fReader.ReadBuffer(buffer,
locator.GetNBytesOnStorage(),
locator.GetPosition<std::uint64_t>());
509 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
512 if (fLastOffset != 0) {
514 const auto distance =
static_cast<std::uint64_t
>(
515 std::abs(
static_cast<std::int64_t
>(
offset) -
static_cast<std::int64_t
>(fLastOffset)));
516 fFileCounters->fSzSkip.Add(
distance);
519 locator.GetPosition<std::uint64_t>());
526 clone->fFile = fFile->Clone();
528 return std::unique_ptr<RPageSourceFile>(clone);
531std::unique_ptr<ROOT::Internal::RCluster>
533 std::vector<ROOT::Internal::RRawFile::RIOVec> &
readRequests)
538 std::uint64_t fOffset = 0;
539 std::uint64_t
fSize = 0;
545 auto pageZeroMap = std::make_unique<ROnDiskPageMap>();
570 std::vector<std::size_t>
gaps;
573 for (
unsigned i = 1; i <
onDiskPages.size(); ++i) {
576 gaps.emplace_back(std::max(gap, std::int64_t(0)));
584 for (
auto g :
gaps) {
606 const std::uint64_t
maxKeySize = fReader.GetMaxKeySize();
611 const std::uint64_t
overhead = std::max(
static_cast<std::int64_t
>(s.fOffset) -
readUpTo, std::int64_t(0));
612 const std::uint64_t
extent = std::max(
static_cast<std::int64_t
>(s.fOffset + s.fSize) -
readUpTo, std::int64_t(0));
616 s.fBufPos =
reinterpret_cast<intptr_t
>(
req.fBuffer) + s.fOffset -
req.fOffset;
625 req.fBuffer =
reinterpret_cast<unsigned char *
>(
req.fBuffer) +
req.fSize;
626 s.fBufPos =
reinterpret_cast<intptr_t
>(
req.fBuffer);
629 req.fOffset = s.fOffset;
633 fCounters->fSzReadPayload.Add(
szPayload);
637 auto buffer =
new unsigned char[
reinterpret_cast<intptr_t
>(
req.fBuffer) +
req.fSize];
638 auto pageMap = std::make_unique<ROOT::Internal::ROnDiskPageMapHeap>(std::unique_ptr<
unsigned char[]>(buffer));
656std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
659 fCounters->fNClusterLoaded.Add(
clusterKeys.size());
661 std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
clusters;
662 std::vector<ROOT::Internal::RRawFile::RIOVec>
readRequests;
680 for (std::size_t i = 0; i <
nBatch; ++i) {
696 for (std::size_t i = 0; i <
nBatch; ++i) {
698 if (fLastOffset != 0) {
699 const auto distance =
static_cast<std::uint64_t
>(std::abs(
700 static_cast<std::int64_t
>(
offset) -
static_cast<std::int64_t
>(fLastOffset)));
701 fFileCounters->fSzSkip.Add(
distance);
708 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
711 RNTupleAtomicTimer
timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
714 fCounters->fNReadV.Inc();
715 fCounters->fNRead.Add(
nBatch);
726 fReader.LoadStreamerInfo();
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
CounterPtrT MakeCounter(const std::string &name, Args &&... args)
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
The SoA field provides I/O for an in-memory SoA layout linked to an on-disk collection of the underly...
An in-memory subset of the packed and compressed pages of a cluster.
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Helper class to compress data blocks in the ROOT compression frame format.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
Write RNTuple data blocks in a TFile or a bare file container.
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize, bool isHidden)
The directory parameter can also be a TFile object (TFile inherits from TDirectory).
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const ROOT::RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< void > DeserializeFooter(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
static RResult< void > DeserializeHeader(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder)
A memory region that contains packed and compressed pages.
A page as being stored on disk, that is packed and compressed.
Base class for a sink with a physical storage backend.
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) override
Incorporate incremental changes to the model into the ntuple descriptor.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
Storage provider that write ntuple pages into a file.
void CommitBatchOfPages(CommitBatch &batch, std::vector< RNTupleLocator > &locators)
Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages con...
RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
std::unique_ptr< RPageSink > CloneAsHidden(std::string_view name, const ROOT::RNTupleWriteOptions &opts) const override
Creates a new sink with the same underlying storage as this but writing to a different RNTuple named ...
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override
RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked)
We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the c...
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
RNTupleLink CommitDatasetImpl() final
std::unique_ptr< ROOT::Internal::RNTupleFileWriter > fWriter
~RPageSinkFile() override
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
Storage provider that reads ntuple pages from a file.
ROOT::RNTupleDescriptor AttachImpl() final
LoadStructureImpl() has been called before AttachImpl() is called
std::int64_t fFileSize
Total file size, set once in AttachImpl()
std::unique_ptr< ROOT::Internal::RCluster > PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector< RRawFile::RIOVec > &readRequests)
Helper function for LoadClusters: it prepares the memory buffer (page map) and the read requests for ...
std::unique_ptr< RPageSource > OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink, const ROOT::RNTupleReadOptions &options={}) final
Creates a new PageSource using the same underlying file as this but referring to a different RNTuple,...
~RPageSourceFile() override
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
void LoadPageListImpl(const RNTupleLocator &locator, unsigned char *buffer) final
std::vector< std::unique_ptr< ROOT::Internal::RCluster > > LoadClusters(std::span< ROOT::Internal::RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
std::unique_ptr< RFileCounters > fFileCounters
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final
RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options)
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
void LoadStructureImpl() final
Fills fStructureBuffer with the compressed header and footer.
void LoadStreamerInfo() final
Forces the loading of ROOT StreamerInfo from the underlying file.
std::unique_ptr< RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
ROOT::Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
Abstract interface to read data from an ntuple.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
ROOT::Experimental::Detail::RNTupleMetrics fMetrics
A page is a slice of a column that is mapped into memory.
The RRawFileTFile wraps an open TFile, but does not take ownership.
The RRawFile provides read-only access to local and remote files.
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
The field for a class with dictionary.
Base class for all ROOT issued exceptions.
A field translates read and write calls from/to underlying columns to/from tree values.
The on-storage metadata of an RNTuple.
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
std::uint64_t GetMaxKeySize() const
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
The field for a class using ROOT standard streaming.
TClass instances represent classes, structs and namespaces in the ROOT type system.
TVirtualStreamerInfo * GetStreamerInfo(Int_t version=0, Bool_t isTransient=kFALSE) const
returns a pointer to the TVirtualStreamerInfo object for version If the object does not exist,...
Describe directory structure in memory.
const char * GetName() const override
Returns name of object.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
The identifiers that specifies the content of a (partial) cluster.
The incremental changes to a RNTupleModel
On-disk pages within a page source are identified by the column and page number.
File-specific I/O performance counters.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Used for vector reads from multiple offsets into multiple buffers.
Information about a single page in the context of a cluster's page range.