41#include <unordered_map>
78 auto charBuf =
reinterpret_cast<const unsigned char *
>(
fBuffer);
79 auto checksumBuf =
const_cast<unsigned char *
>(charBuf) +
GetDataSize();
80 std::uint64_t xxhash3;
91 return R__FAIL(
"page checksum verification failed, data corruption detected");
98 return R__FAIL(
"invalid attempt to extract non-existing page checksum");
101 std::uint64_t checksum;
112 auto [itr,
_] =
fColumnInfos.emplace(physicalColumnId, std::vector<RColumnInfo>());
113 for (
auto &columnInfo : itr->second) {
114 if (columnInfo.fElementId == elementId) {
115 columnInfo.fRefCounter++;
119 itr->second.emplace_back(
RColumnInfo{elementId, 1});
127 for (std::size_t i = 0; i < itr->second.size(); ++i) {
128 if (itr->second[i].fElementId != elementId)
131 itr->second[i].fRefCounter--;
132 if (itr->second[i].fRefCounter == 0) {
133 itr->second.erase(itr->second.begin() + i);
134 if (itr->second.empty()) {
146 result.insert(physicalColumnId);
176std::unique_ptr<ROOT::Internal::RPageSource>
180 if (ntupleName.empty()) {
183 if (location.empty()) {
186 if (location.find(
"daos://") == 0)
188 return std::make_unique<ROOT::Experimental::Internal::RPageSourceDaos>(ntupleName, location, options);
193 return std::make_unique<ROOT::Internal::RPageSourceFile>(ntupleName, location, options);
240 clone->fHasStructure =
true;
241 clone->fIsAttached =
true;
266 const auto clusterId = cluster->
GetId();
268 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
272 std::atomic<bool> foundChecksumFailure{
false};
274 std::vector<std::unique_ptr<RColumnElementBase>> allElements;
276 for (
const auto columnId : columnsInCluster) {
284 allElements.reserve(allElements.size() + columnInfos.size());
285 for (
const auto &info : columnInfos) {
288 const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
289 std::uint64_t pageNo = 0;
290 std::uint64_t firstInPage = 0;
291 for (
const auto &pi : pageRange.GetPageInfos()) {
297 sealedPage.
SetBuffer(onDiskPage->GetAddress());
300 auto taskFunc = [
this, columnId, clusterId, firstInPage, sealedPage, element = allElements.back().get(),
301 &foundChecksumFailure,
302 indexOffset = clusterDescriptor.GetColumnRange(columnId).GetFirstElementIndex()]() {
306 foundChecksumFailure =
true;
309 auto newPage = rv.Unwrap();
312 newPage.SetWindow(indexOffset + firstInPage,
314 fPagePool.PreloadPage(std::move(newPage), keyPagePool);
319 firstInPage += pi.GetNElements();
329 if (foundChecksumFailure) {
330 throw RException(
R__FAIL(
"page checksum verification failed, data corruption detected"));
340 const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterKey.
fClusterId);
343 if (clusterDesc.GetColumnRange(physicalColumnId).IsSuppressed())
346 const auto &pageRange = clusterDesc.GetPageRange(physicalColumnId);
348 for (
const auto &pageInfo : pageRange.GetPageInfos()) {
352 pageInfo.GetLocator().GetNBytesOnStorage()));
354 perPageFunc(physicalColumnId, pageNo, pageInfo);
377 std::size_t poolWindow = 0;
402 if (!cachedPageRef.Get().IsNull()) {
404 return cachedPageRef;
407 std::uint64_t idxInCluster;
411 clusterInfo.
fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);
414 throw RException(
R__FAIL(
"entry with index " + std::to_string(globalIndex) +
" out of bounds"));
416 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterInfo.
fClusterId);
417 const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
418 if (columnRange.IsSuppressed())
421 clusterInfo.
fColumnOffset = columnRange.GetFirstElementIndex();
424 clusterInfo.
fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
431 return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
443 if (!cachedPageRef.Get().IsNull()) {
445 return cachedPageRef;
454 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
455 const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
456 if (columnRange.IsSuppressed())
460 clusterInfo.
fColumnOffset = columnRange.GetFirstElementIndex();
461 clusterInfo.
fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
468 return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
483 "number of partial clusters preloaded from storage"),
490 "CPU time spent decompressing"),
492 "bwRead",
"MB/s",
"bandwidth compressed bytes read per second",
fMetrics,
494 if (
const auto szReadPayload = metrics.GetLocalCounter(
"szReadPayload")) {
495 if (
const auto szReadOverhead = metrics.GetLocalCounter(
"szReadOverhead")) {
496 if (
const auto timeWallRead = metrics.GetLocalCounter(
"timeWallRead")) {
497 if (
auto walltime = timeWallRead->GetValueAsInt()) {
498 double payload = szReadPayload->GetValueAsInt();
499 double overhead = szReadOverhead->GetValueAsInt();
501 return {
true, (1000. * (payload + overhead) / walltime)};
509 "bwReadUnzip",
"MB/s",
"bandwidth uncompressed bytes read per second",
fMetrics,
511 if (const auto szUnzip = metrics.GetLocalCounter(
"szUnzip")) {
512 if (const auto timeWallRead = metrics.GetLocalCounter(
"timeWallRead")) {
513 if (auto walltime = timeWallRead->GetValueAsInt()) {
514 double unzip = szUnzip->GetValueAsInt();
516 return {true, 1000. * unzip / walltime};
523 "bwUnzip",
"MB/s",
"decompression bandwidth of uncompressed bytes per second",
fMetrics,
525 if (const auto szUnzip = metrics.GetLocalCounter(
"szUnzip")) {
526 if (const auto timeWallUnzip = metrics.GetLocalCounter(
"timeWallUnzip")) {
527 if (auto walltime = timeWallUnzip->GetValueAsInt()) {
528 double unzip = szUnzip->GetValueAsInt();
530 return {true, 1000. * unzip / walltime};
537 "rtReadEfficiency",
"",
"ratio of payload over all bytes read", fMetrics,
539 if (const auto szReadPayload = metrics.GetLocalCounter(
"szReadPayload")) {
540 if (const auto szReadOverhead = metrics.GetLocalCounter(
"szReadOverhead")) {
541 if (auto payload = szReadPayload->GetValueAsInt()) {
543 return {true, 1. / (1. + (1. * szReadOverhead->GetValueAsInt()) / payload)};
549 *fMetrics.MakeCounter<
RNTupleCalcPerf *>(
"rtCompression",
"",
"ratio of compressed bytes / uncompressed bytes",
550 fMetrics, [](
const RNTupleMetrics &metrics) -> std::pair<bool, double> {
551 if (const auto szReadPayload =
552 metrics.GetLocalCounter(
"szReadPayload")) {
553 if (const auto szUnzip = metrics.GetLocalCounter(
"szUnzip")) {
554 if (auto unzip = szUnzip->GetValueAsInt()) {
555 return {true, (1. * szReadPayload->GetValueAsInt()) / unzip};
578 memset(page.GetBuffer(), 0, page.GetNBytes());
595 memcpy(page.GetBuffer(), sealedPage.
GetBuffer(), bytesPacked);
601 page = std::move(tmp);
613 for (
const auto &extraTypeInfo :
fDescriptor.GetExtraTypeInfoIterable()) {
641 if (itr->fCurrentPageSize <= pageSizeLimit)
643 if (itr->fCurrentPageSize == itr->fInitialPageSize) {
649 auto itrFlush = itr++;
721 assert(config.
fPage);
725 unsigned char *pageBuf =
reinterpret_cast<unsigned char *
>(config.
fPage->
GetBuffer());
726 bool isAdoptedBuffer =
true;
732 pageBuf =
new unsigned char[nBytesPacked];
733 isAdoptedBuffer =
false;
736 auto nBytesZipped = nBytesPacked;
742 if (!isAdoptedBuffer)
744 pageBuf =
reinterpret_cast<unsigned char *
>(config.
fBuffer);
745 isAdoptedBuffer =
true;
764 config.
fPage = &page;
785 const auto nBytes = elementSize * nElements;
793std::unique_ptr<ROOT::Internal::RPageSink>
797 if (ntupleName.empty()) {
800 if (location.empty()) {
803 if (location.find(
"daos://") == 0) {
805 return std::make_unique<ROOT::Experimental::Internal::RPageSinkDaos>(ntupleName, location, options);
812 return std::make_unique<ROOT::Internal::RPageSinkFile>(ntupleName, location, options);
858 if (descriptor.GetNLogicalColumns() > descriptor.GetNPhysicalColumns()) {
862 const auto &reps =
f.GetColumnRepresentatives();
865 return reps.size() * reps[0].size();
867 std::uint32_t nNewPhysicalColumns = 0;
869 nNewPhysicalColumns += getNColumns(*
f);
870 for (
const auto &descendant : *
f)
871 nNewPhysicalColumns += getNColumns(descendant);
877 auto fieldId = descriptor.GetNFields();
880 f.SetOnDiskId(fieldId);
884 auto fieldId = descriptor.GetNFields();
890 f.SetOnDiskId(fieldId);
891 for (
const auto &source : descriptor.GetColumnIterable(sourceFieldId)) {
892 auto targetId = descriptor.GetNLogicalColumns();
899 .
Type(source.GetType())
900 .
Index(source.GetIndex())
907 const auto nColumnsBeforeUpdate = descriptor.GetNPhysicalColumns();
910 for (
auto &descendant : *
f)
911 addField(descendant);
914 addProjectedField(*
f);
915 for (
auto &descendant : *
f)
916 addProjectedField(descendant);
919 const auto nColumns = descriptor.GetNPhysicalColumns();
946 throw RException(
R__FAIL(
"ROOT bug: unexpected type extra info in UpdateExtraTypeInfo()"));
958 fieldZero.SetOnDiskId(0);
960 projectedFields.GetFieldZero().SetOnDiskId(0);
963 initialChangeset.
fAddedFields.reserve(fieldZero.GetMutableSubfields().size());
964 for (
auto f : fieldZero.GetMutableSubfields())
966 initialChangeset.
fAddedProjectedFields.reserve(projectedFields.GetFieldZero().GetMutableSubfields().size());
967 for (
auto f : projectedFields.GetFieldZero().GetMutableSubfields())
979std::unique_ptr<ROOT::RNTupleModel>
987 const auto nColumns = descriptor.GetNPhysicalColumns();
992 const auto &column = descriptor.GetColumnDescriptor(i);
1012 if (!cluster.ContainsColumn(i))
1014 const auto &columnRange = cluster.GetColumnRange(i);
1015 R__ASSERT(columnRange.GetPhysicalColumnId() == i);
1028 modelOpts.SetReconstructProjections(
true);
1030 modelOpts.SetEmulateUnknownTypes(
true);
1031 auto model = descriptor.CreateModel(modelOpts);
1032 if (!copyClusters) {
1034 projectedFields.GetFieldZero().SetOnDiskId(model->GetConstFieldZero().GetOnDiskId());
1076 fOpenPageRanges.at(physicalColumnId).GetPageInfos().emplace_back(pageInfo);
1079std::vector<ROOT::RNTupleLocator>
1081 const std::vector<bool> &mask)
1083 std::vector<ROOT::RNTupleLocator> locators;
1084 locators.reserve(mask.size());
1086 for (
auto &range : ranges) {
1087 for (
auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1092 locators.shrink_to_fit();
1099 struct RSealedPageLink {
1101 std::size_t fLocatorIdx = 0;
1104 std::vector<bool> mask;
1106 std::vector<std::size_t> locatorIndexes;
1108 std::unordered_map<std::uint64_t, RSealedPageLink> originalPages;
1109 std::size_t iLocator = 0;
1110 for (
auto &range : ranges) {
1111 const auto rangeSize = std::distance(range.fFirst, range.fLast);
1112 mask.reserve(mask.size() + rangeSize);
1113 locatorIndexes.reserve(locatorIndexes.size() + rangeSize);
1115 for (
auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1117 mask.emplace_back(
true);
1118 locatorIndexes.emplace_back(iLocator++);
1122 R__ASSERT(sealedPageIt->GetHasChecksum());
1124 const auto chk = sealedPageIt->GetChecksum().Unwrap();
1125 auto itr = originalPages.find(chk);
1126 if (itr == originalPages.end()) {
1127 originalPages.insert({chk, {&(*sealedPageIt), iLocator}});
1128 mask.emplace_back(
true);
1129 locatorIndexes.emplace_back(iLocator++);
1133 const auto *p = itr->second.fSealedPage;
1134 if (sealedPageIt->GetDataSize() != p->GetDataSize() ||
1135 memcmp(sealedPageIt->GetBuffer(), p->GetBuffer(), p->GetDataSize())) {
1136 mask.emplace_back(
true);
1137 locatorIndexes.emplace_back(iLocator++);
1141 mask.emplace_back(
false);
1142 locatorIndexes.emplace_back(itr->second.fLocatorIdx);
1145 mask.shrink_to_fit();
1146 locatorIndexes.shrink_to_fit();
1152 for (
auto &range : ranges) {
1153 for (
auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1154 fOpenColumnRanges.at(range.fPhysicalColumnId).IncrementNElements(sealedPageIt->GetNElements());
1158 pageInfo.
SetLocator(locators[locatorIndexes[i++]]);
1160 fOpenPageRanges.at(range.fPhysicalColumnId).GetPageInfos().emplace_back(pageInfo);
1173 RStagedCluster::RColumnInfo columnInfo;
1174 columnInfo.fCompressionSettings =
fOpenColumnRanges[i].GetCompressionSettings().value();
1177 columnInfo.fPageRange.SetPhysicalColumnId(i);
1178 columnInfo.fIsSuppressed =
true;
1189 stagedCluster.
fColumnInfos.push_back(std::move(columnInfo));
1192 return stagedCluster;
1197 for (
const auto &cluster : clusters) {
1202 for (
const auto &columnInfo : cluster.fColumnInfos) {
1203 const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1204 if (columnInfo.fIsSuppressed) {
1205 assert(columnInfo.fPageRange.GetPageInfos().empty());
1209 columnInfo.fCompressionSettings, columnInfo.fPageRange);
1215 for (
const auto &columnInfo : cluster.fColumnInfos) {
1216 if (!columnInfo.fIsSuppressed)
1218 const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1222 const auto &columnRangeFromDesc = clusterBuilder.
GetColumnRange(colId);
1223 fOpenColumnRanges[colId].SetFirstElementIndex(columnRangeFromDesc.GetFirstElementIndex() +
1224 columnRangeFromDesc.GetNElements());
1236 const auto nClusters = descriptor.GetNActiveClusters();
1237 std::vector<ROOT::DescriptorId_t> physClusterIDs;
1238 physClusterIDs.reserve(nClusters);
1248 const auto clusterGroupId = descriptor.GetNClusterGroups();
1256 const auto &lastClusterDesc = descriptor.GetClusterDescriptor(nClusters - 1);
1257 cgBuilder.
MinEntry(firstClusterDesc.GetFirstEntryIndex())
1258 .
EntrySpan(lastClusterDesc.GetFirstEntryIndex() + lastClusterDesc.GetNEntries() -
1259 firstClusterDesc.GetFirstEntryIndex())
1262 std::vector<ROOT::DescriptorId_t> clusterIds;
1263 clusterIds.reserve(nClusters);
1265 clusterIds.emplace_back(i);
1294 for (
const auto &etDesc :
fDescriptorBuilder.GetDescriptor().GetExtraTypeInfoIterable()) {
1298 R__ASSERT(etDesc.GetTypeName().empty());
1299 R__ASSERT(etDesc.GetTypeVersion() == 0);
1331 "CPU time spent compressing")});
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking).
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RResult< ROOT::Experimental::RNTupleAttrSetDescriptor > MoveDescriptor()
Attempt to make an AttributeSet descriptor.
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
const ColumnSet_t & GetAvailPhysicalColumns() const
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
ROOT::DescriptorId_t GetId() const
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
virtual bool IsMappable() const
Derived, typed classes tell whether the on-storage layout is bitwise identical to the memory layout.
virtual void Unpack(void *destination, const void *source, std::size_t count) const
If the on-storage layout and the in-memory layout differ, unpacking creates a memory page from an on-...
std::size_t GetSize() const
virtual RIdentifier GetIdentifier() const =0
virtual void Pack(void *destination, const void *source, std::size_t count) const
If the on-storage layout and the in-memory layout differ, packing creates an on-disk page from an in-...
std::size_t GetPackedSize(std::size_t nElements=1U) const
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
std::optional< std::pair< double, double > > GetValueRange() const
std::uint16_t GetRepresentationIndex() const
ROOT::Internal::RColumnElementBase * GetElement() const
ROOT::ENTupleColumnType GetType() const
ROOT::NTupleSize_t GetFirstElementIndex() const
std::size_t GetWritePageCapacity() const
ROOT::DescriptorId_t GetOnDiskId() const
std::uint16_t GetBitsOnStorage() const
std::uint32_t GetIndex() const
A helper class for piece-wise construction of an RFieldDescriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
static unsigned int GetClusterBunchSize(const RNTupleReadOptions &options)
A helper class for serializing and deserialization of the RNTuple binary format.
static std::uint32_t SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer)
Writes a XxHash-3 64bit checksum of the byte range given by data and length.
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
static RResult< void > VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3)
Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
EDescriptorDeserializeMode
static RResult< std::uint32_t > SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span< ROOT::DescriptorId_t > physClusterIDs, const RContext &context)
static RResult< std::uint32_t > SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static RResult< RContext > SerializeHeader(void *buffer, const RNTupleDescriptor &desc)
static std::string SerializeStreamerInfos(const StreamerInfoMap_t &infos)
A memory region that contains packed and compressed pages.
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
A page as being stored on disk, that is packed and compressed.
Uses standard C++ memory allocation for the column data pages.
Abstract interface to allocate and release pages.
virtual RPage NewPage(std::size_t elementSize, std::size_t nElements)=0
Reserves memory large enough to hold nElements of the given size.
RStagedCluster StageCluster(ROOT::NTupleSize_t nNewEntries) final
Stage the current cluster and create a new one for the following data.
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) override
Incorporate incremental changes to the model into the ntuple descriptor.
void CommitSealedPage(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
std::uint64_t fNextClusterInGroup
Remembers the starting cluster id for the next cluster group.
std::unique_ptr< RNTupleModel > InitFromDescriptor(const ROOT::RNTupleDescriptor &descriptor, bool copyClusters)
Initialize sink based on an existing descriptor and fill into the descriptor builder,...
void UpdateExtraTypeInfo(const ROOT::RExtraTypeInfoDescriptor &extraTypeInfo) final
Adds an extra type information record to schema.
void CommitAttributeSet(std::string_view attrSetName, const RNTupleLink &attrAnchorInfo) final
Adds the given anchor information (name + locator) into the main RNTuple's descriptor as an attribute...
ColumnHandle_t AddColumn(ROOT::DescriptorId_t fieldId, ROOT::Internal::RColumn &column) final
Register a new column.
ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask)
Vector commit of preprocessed pages.
RPagePersistentSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
void CommitSuppressedColumn(ColumnHandle_t columnHandle) final
Commits a suppressed column for the current cluster.
ROOT::Internal::RNTupleSerializer::RContext fSerializationContext
Used to map the IDs of the descriptor to the physical IDs issued during header/footer serialization.
ROOT::NTupleSize_t fPrevClusterNEntries
Used to calculate the number of entries in the current cluster.
virtual RNTupleLink CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length)=0
virtual RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length)=0
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
void CommitStagedClusters(std::span< RStagedCluster > clusters) final
Commit staged clusters, logically appending them to the ntuple descriptor.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Guess the concrete derived page source from the location.
std::vector< ROOT::RClusterDescriptor::RPageRange > fOpenPageRanges
Keeps track of the written pages in the currently open cluster. Indexed by column id.
void CommitPage(ColumnHandle_t columnHandle, const ROOT::Internal::RPage &page) final
Write a page to the storage. The column must have been added before.
RNTupleLink CommitDatasetImpl() final
~RPagePersistentSink() override
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
std::vector< ROOT::RClusterDescriptor::RColumnRange > fOpenColumnRanges
Keeps track of the number of elements in the currently open cluster. Indexed by column id.
std::unique_ptr< RCounters > fCounters
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
virtual RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage)=0
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fInfosOfStreamerFields
Union of the streamer info records that are sent from streamer fields to the sink before committing t...
virtual RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const ROOT::Internal::RPage &page)=0
virtual std::uint64_t StageClusterImpl()=0
Returns the number of bytes written to storage (excluding metadata).
virtual RNTupleLink CommitDatasetImpl()=0
RNTupleLink CommitDataset()
Run the registered callbacks and finalize the current cluster and the entrire data set.
std::vector< unsigned char > fSealPageBuffer
Used as destination buffer in the simple SealPage overload.
std::unique_ptr< ROOT::RNTupleWriteOptions > fOptions
std::vector< Callback_t > fOnDatasetCommitCallbacks
virtual ROOT::Internal::RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
const ROOT::RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
RSealedPage SealPage(const ROOT::Internal::RPage &page, const ROOT::Internal::RColumnElementBase &element)
Helper for streaming a page.
bool fIsInitialized
Flag if sink was initialized.
RPageSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
RWritePageMemoryManager fWritePageMemoryManager
Used in ReservePage to maintain the page buffer budget.
std::unordered_map< ROOT::DescriptorId_t, std::vector< RColumnInfo > > fColumnInfos
Maps physical column IDs to all the requested in-memory representations.
void Insert(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId)
ROOT::Internal::RCluster::ColumnSet_t ToColumnSet() const
void Erase(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId)
ROOT::Internal::RClusterPool fClusterPool
The cluster pool asynchronously preloads the next few clusters.
void LoadStructure()
Loads header and footer without decompressing or deserializing them.
virtual ROOT::Internal::RPageRef LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex)
Allocates and fills a page that contains the index-th element.
ROOT::DescriptorId_t fLastUsedCluster
Remembers the last cluster id from which a page was requested.
RExclDescriptorGuard GetExclDescriptorGuard()
Note that the underlying lock is not recursive. See GetSharedDescriptorGuard() for further informatio...
ROOT::RNTupleReadOptions fOptions
void RegisterStreamerInfos()
Builds the streamer info records from the descriptor's extra type info section.
void Attach(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode=ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kForReading)
Open the physical storage container and deserialize header and footer.
ColumnHandle_t AddColumn(ROOT::DescriptorId_t fieldId, ROOT::Internal::RColumn &column) override
Register a new column.
void UnzipCluster(ROOT::Internal::RCluster *cluster)
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
std::map< ROOT::NTupleSize_t, ROOT::DescriptorId_t > fPreloadedClusters
Clusters from where pages got preloaded in UnzipClusterImpl(), ordered by first entry number of the c...
void PrepareLoadCluster(const ROOT::Internal::RCluster::RKey &clusterKey, ROOT::Internal::ROnDiskPageMap &pageZeroMap, std::function< void(ROOT::DescriptorId_t, ROOT::NTupleSize_t, const ROOT::RClusterDescriptor::RPageInfo &)> perPageFunc)
Prepare a page range read for the column set in clusterKey.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
bool fHasStreamerInfosRegistered
Set to true when RegisterStreamerInfos() is called.
ROOT::NTupleSize_t GetNEntries()
REntryRange fEntryRange
Used by the cluster pool to prevent reading beyond the given range.
void UpdateLastUsedCluster(ROOT::DescriptorId_t clusterId)
Does nothing if fLastUsedCluster == clusterId.
virtual ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster)=0
virtual std::unique_ptr< RPageSource > CloneImpl() const =0
Returns a new, unattached page source for the same data set.
std::unique_ptr< RCounters > fCounters
std::unordered_set< ROOT::DescriptorId_t > fPinnedClusters
Pinned clusters and their $2 * (cluster bunch size) - 1$ successors will not be evicted from the clus...
ROOT::NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
virtual void LoadStructureImpl()=0
ROOT::RNTupleDescriptor fDescriptor
ROOT::Internal::RPagePool fPagePool
Pages that are unzipped with IMT are staged into the page pool.
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
virtual ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode)=0
LoadStructureImpl() has been called before AttachImpl() is called
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster)
RPageSource(std::string_view ntupleName, const ROOT::RNTupleReadOptions &fOptions)
void SetEntryRange(const REntryRange &range)
Promise to only read from the given entry range.
std::unique_ptr< RPageSource > Clone() const
Open the same storage multiple time, e.g.
RActivePhysicalColumns fActivePhysicalColumns
The active columns are implicitly defined by the model fields or views.
bool fIsAttached
Set to true once Attach() is called.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location).
static RResult< ROOT::Internal::RPage > UnsealPage(const RSealedPage &sealedPage, const ROOT::Internal::RColumnElementBase &element, ROOT::Internal::RPageAllocator &pageAlloc)
Helper for unstreaming a page.
bool fHasStructure
Set to true once LoadStructure() is called.
RTaskScheduler * fTaskScheduler
std::unique_ptr< ROOT::Internal::RPageAllocator > fPageAllocator
For the time being, we will use the heap allocator for all sources and sinks. This may change in the ...
static constexpr std::size_t kNBytesPageChecksum
The page checksum is a 64bit xxhash3.
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
ROOT::Experimental::Detail::RNTupleMetrics fMetrics
RPageStorage(std::string_view name)
Stores information about the cluster in which this page resides.
A page is a slice of a column that is mapped into memory.
std::uint32_t GetNElements() const
std::size_t GetNBytes() const
The space taken by column elements in the buffer.
void * GrowUnchecked(std::uint32_t nElements)
Increases the number elements in the page.
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
const ROOT::RFieldBase * GetSourceField(const ROOT::RFieldBase *target) const
std::size_t fCurrentAllocatedBytes
Sum of all the write page sizes (their capacity) of the columns in fColumnsSortedByPageSize.
std::size_t fMaxAllocatedBytes
Maximum allowed value for fCurrentAllocatedBytes, set from RNTupleWriteOptions::fPageBufferBudget.
bool TryEvict(std::size_t targetAvailableSize, std::size_t pageSizeLimit)
Flush columns in order of allocated write page size until the sum of all write page allocations leave...
bool TryUpdate(ROOT::Internal::RColumn &column, std::size_t newWritePageSize)
Try to register the new write page size for the given column.
std::set< RColumnInfo, std::greater< RColumnInfo > > fColumnsSortedByPageSize
All columns that called ReservePage() (hence TryUpdate()) at least once, sorted by their current writ...
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
void SetNElements(ROOT::NTupleSize_t n)
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t GetFirstEntryIndex() const
Base class for all ROOT issued exceptions.
A field translates read and write calls from/to underlying columns to/from tree values.
ROOT::DescriptorId_t GetOnDiskId() const
The on-storage metadata of an RNTuple.
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
ROOT::NTupleSize_t GetIndexInCluster() const
ROOT::DescriptorId_t GetClusterId() const
ELocatorType GetType() const
For non-disk locators, the value for the Type field.
The RNTupleModel encapulates the schema of an RNTuple.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
void ThrowOnError()
Short-hand method to throw an exception in the case of errors.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
RNTupleTimer< RNTupleAtomicCounter, RNTupleTickCounter< RNTupleAtomicCounter > > RNTupleAtomicTimer
const std::uint16_t kSchemaVersionMajor
const std::uint16_t kSchemaVersionMinor
ROOT::RFieldZero & GetFieldZeroOfModel(RNTupleModel &model)
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
RProjectedFields & GetProjectedFieldsOfModel(RNTupleModel &model)
std::unique_ptr< RColumnElementBase > GenerateColumnElement(std::type_index inMemoryType, ROOT::ENTupleColumnType onDiskType)
void CallConnectPageSinkOnField(RFieldBase &, ROOT::Internal::RPageSink &, ROOT::NTupleSize_t firstEntry=0)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
The identifiers that specifies the content of a (partial) cluster.
ROOT::DescriptorId_t fClusterId
ColumnSet_t fPhysicalColumnSet
Every concrete RColumnElement type is identified by its on-disk type (column type) and the in-memory ...
std::uint32_t fLength
Uncompressed length of the anchor, including the checksum.
The incremental changes to a RNTupleModel.
std::vector< ROOT::RFieldBase * > fAddedProjectedFields
Points to the projected fields in fModel that were added as part of an updater transaction.
std::vector< ROOT::RFieldBase * > fAddedFields
Points to the fields in fModel that were added as part of an updater transaction.
On-disk pages within a page source are identified by the column and page number.
Default I/O performance counters that get registered in fMetrics.
Parameters for the SealPage() method.
bool fWriteChecksum
Adds a 8 byte little-endian xxhash3 checksum to the page payload.
std::uint32_t fCompressionSettings
Compression algorithm and level to apply.
void * fBuffer
Location for sealed output. The memory buffer has to be large enough.
const ROOT::Internal::RPage * fPage
Input page to be sealed.
bool fAllowAlias
If false, the output buffer must not point to the input page buffer, which would otherwise be an opti...
const ROOT::Internal::RColumnElementBase * fElement
Corresponds to the page's elements, for size calculation etc.
Cluster that was staged, but not yet logically appended to the RNTuple.
ROOT::NTupleSize_t fNEntries
std::uint64_t fNBytesWritten
std::vector< RColumnInfo > fColumnInfos
Summarizes cluster-level information that are necessary to load a certain page.
std::uint64_t fColumnOffset
The first element number of the page's column in the given cluster.
ROOT::DescriptorId_t fClusterId
ROOT::RClusterDescriptor::RPageInfoExtended fPageInfo
Location of the page on disk.
Default I/O performance counters that get registered in fMetrics.
Used in SetEntryRange / GetEntryRange.
ROOT::NTupleSize_t fNEntries
bool IntersectsWith(const ROOT::RClusterDescriptor &clusterDesc) const
Returns true if the given cluster has entries within the entry range.
ROOT::NTupleSize_t fFirstEntry
ROOT::Internal::RColumn * fColumn
ROOT::DescriptorId_t fPhysicalId
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RResult< void > VerifyChecksumIfEnabled() const
void SetBuffer(const void *buffer)
bool fHasChecksum
If set, the last 8 bytes of the buffer are the xxhash of the rest of the buffer.
std::uint32_t GetNElements() const
void SetHasChecksum(bool hasChecksum)
const void * GetBuffer() const
void SetNElements(std::uint32_t nElements)
bool GetHasChecksum() const
RResult< std::uint64_t > GetChecksum() const
Returns a failure if the sealed page has no checksum.
std::size_t GetBufferSize() const
void SetBufferSize(std::size_t bufferSize)
std::size_t fBufferSize
Size of the page payload and the trailing checksum (if available).
std::size_t GetDataSize() const
ROOT::Internal::RColumn * fColumn
std::size_t fCurrentPageSize
std::size_t fInitialPageSize
bool operator>(const RColumnInfo &other) const
Information about a single page in the context of a cluster's page range.
void SetLocator(const RNTupleLocator &locator)
const RNTupleLocator & GetLocator() const
void SetHasChecksum(bool hasChecksum)
void SetNElements(std::uint32_t n)
Modifiers passed to CreateModel().