Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorage.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2018-10-04
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#include <ROOT/RPageStorage.hxx>
15#include <ROOT/RColumn.hxx>
16#include <ROOT/RFieldBase.hxx>
20#include <ROOT/RNTupleModel.hxx>
22#include <ROOT/RNTupleUtils.hxx>
23#include <ROOT/RNTupleZip.hxx>
25#include <ROOT/RPageSinkBuf.hxx>
27#ifdef R__ENABLE_DAOS
29#endif
30
31#include <Compression.h>
32#include <TError.h>
33
34#include <algorithm>
35#include <atomic>
36#include <cassert>
37#include <cstring>
38#include <functional>
39#include <memory>
40#include <string_view>
41#include <unordered_map>
42#include <utility>
43
53
55
59
65
67 : fMetrics(""), fPageAllocator(std::make_unique<ROOT::Internal::RPageAllocatorHeap>()), fNTupleName(name)
68{
69}
70
72
74{
75 if (!fHasChecksum)
76 return;
77
78 auto charBuf = reinterpret_cast<const unsigned char *>(fBuffer);
79 auto checksumBuf = const_cast<unsigned char *>(charBuf) + GetDataSize();
80 std::uint64_t xxhash3;
82}
83
85{
86 if (!fHasChecksum)
88
89 auto success = RNTupleSerializer::VerifyXxHash3(reinterpret_cast<const unsigned char *>(fBuffer), GetDataSize());
90 if (!success)
91 return R__FAIL("page checksum verification failed, data corruption detected");
93}
94
96{
97 if (!fHasChecksum)
98 return R__FAIL("invalid attempt to extract non-existing page checksum");
99
100 assert(fBufferSize >= kNBytesPageChecksum);
101 std::uint64_t checksum;
103 reinterpret_cast<const unsigned char *>(fBuffer) + fBufferSize - kNBytesPageChecksum, checksum);
104 return checksum;
105}
106
107//------------------------------------------------------------------------------
108
111{
112 auto [itr, _] = fColumnInfos.emplace(physicalColumnId, std::vector<RColumnInfo>());
113 for (auto &columnInfo : itr->second) {
114 if (columnInfo.fElementId == elementId) {
115 columnInfo.fRefCounter++;
116 return;
117 }
118 }
119 itr->second.emplace_back(RColumnInfo{elementId, 1});
120}
121
124{
125 auto itr = fColumnInfos.find(physicalColumnId);
126 R__ASSERT(itr != fColumnInfos.end());
127 for (std::size_t i = 0; i < itr->second.size(); ++i) {
128 if (itr->second[i].fElementId != elementId)
129 continue;
130
131 itr->second[i].fRefCounter--;
132 if (itr->second[i].fRefCounter == 0) {
133 itr->second.erase(itr->second.begin() + i);
134 if (itr->second.empty()) {
135 fColumnInfos.erase(itr);
136 }
137 }
138 break;
139 }
140}
141
149
151{
152 if (fFirstEntry == ROOT::kInvalidNTupleIndex) {
153 /// Entry range unset, we assume that the entry range covers the complete source
154 return true;
155 }
156
157 if (clusterDesc.GetNEntries() == 0)
158 return true;
159 if ((clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries()) <= fFirstEntry)
160 return false;
161 if (clusterDesc.GetFirstEntryIndex() >= (fFirstEntry + fNEntries))
162 return false;
163 return true;
164}
165
168 fClusterPool(*this, ROOT::Internal::RNTupleReadOptionsManip::GetClusterBunchSize(options)),
169 fPagePool(*this),
170 fOptions(options)
171{
172}
173
175
176std::unique_ptr<ROOT::Internal::RPageSource>
177ROOT::Internal::RPageSource::Create(std::string_view ntupleName, std::string_view location,
178 const ROOT::RNTupleReadOptions &options)
179{
180 if (ntupleName.empty()) {
181 throw RException(R__FAIL("empty RNTuple name"));
182 }
183 if (location.empty()) {
184 throw RException(R__FAIL("empty storage location"));
185 }
186 if (location.find("daos://") == 0)
187#ifdef R__ENABLE_DAOS
188 return std::make_unique<ROOT::Experimental::Internal::RPageSourceDaos>(ntupleName, location, options);
189#else
190 throw RException(R__FAIL("This RNTuple build does not support DAOS."));
191#endif
192
193 return std::make_unique<ROOT::Internal::RPageSourceFile>(ntupleName, location, options);
194}
195
198{
200 auto physicalId =
201 GetSharedDescriptorGuard()->FindPhysicalColumnId(fieldId, column.GetIndex(), column.GetRepresentationIndex());
203 fActivePhysicalColumns.Insert(physicalId, column.GetElement()->GetIdentifier());
204 return ColumnHandle_t{physicalId, &column};
205}
206
208{
209 fActivePhysicalColumns.Erase(columnHandle.fPhysicalId, columnHandle.fColumn->GetElement()->GetIdentifier());
210}
211
213{
214 if ((range.fFirstEntry + range.fNEntries) > GetNEntries()) {
215 throw RException(R__FAIL("invalid entry range"));
216 }
217 fEntryRange = range;
218}
219
221{
222 if (!fHasStructure)
223 LoadStructureImpl();
224 fHasStructure = true;
225}
226
228{
229 LoadStructure();
230 if (!fIsAttached)
231 GetExclDescriptorGuard().MoveIn(AttachImpl(mode));
232 fIsAttached = true;
233}
234
235std::unique_ptr<ROOT::Internal::RPageSource> ROOT::Internal::RPageSource::Clone() const
236{
237 auto clone = CloneImpl();
238 if (fIsAttached) {
239 clone->GetExclDescriptorGuard().MoveIn(GetSharedDescriptorGuard()->Clone());
240 clone->fHasStructure = true;
241 clone->fIsAttached = true;
242 }
243 return clone;
244}
245
247{
248 return GetSharedDescriptorGuard()->GetNEntries();
249}
250
252{
253 return GetSharedDescriptorGuard()->GetNElements(columnHandle.fPhysicalId);
254}
255
257{
258 if (fTaskScheduler)
259 UnzipClusterImpl(cluster);
260}
261
263{
264 RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip);
265
266 const auto clusterId = cluster->GetId();
267 auto descriptorGuard = GetSharedDescriptorGuard();
268 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
269
270 fPreloadedClusters[clusterDescriptor.GetFirstEntryIndex()] = clusterId;
271
272 std::atomic<bool> foundChecksumFailure{false};
273
274 std::vector<std::unique_ptr<RColumnElementBase>> allElements;
275 const auto &columnsInCluster = cluster->GetAvailPhysicalColumns();
276 for (const auto columnId : columnsInCluster) {
277 // By the time we unzip a cluster, the set of active columns may have already changed wrt. to the moment when
278 // we requested reading the cluster. That doesn't matter much, we simply decompress what is now in the list
279 // of active columns.
280 if (!fActivePhysicalColumns.HasColumnInfos(columnId))
281 continue;
282 const auto &columnInfos = fActivePhysicalColumns.GetColumnInfos(columnId);
283
284 allElements.reserve(allElements.size() + columnInfos.size());
285 for (const auto &info : columnInfos) {
286 allElements.emplace_back(GenerateColumnElement(info.fElementId));
287
288 const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
289 std::uint64_t pageNo = 0;
290 std::uint64_t firstInPage = 0;
291 for (const auto &pi : pageRange.GetPageInfos()) {
292 auto onDiskPage = cluster->GetOnDiskPage(ROnDiskPage::Key{columnId, pageNo});
294 sealedPage.SetNElements(pi.GetNElements());
295 sealedPage.SetHasChecksum(pi.HasChecksum());
296 sealedPage.SetBufferSize(pi.GetLocator().GetNBytesOnStorage() + pi.HasChecksum() * kNBytesPageChecksum);
297 sealedPage.SetBuffer(onDiskPage->GetAddress());
298 R__ASSERT(onDiskPage && (onDiskPage->GetSize() == sealedPage.GetBufferSize()));
299
300 auto taskFunc = [this, columnId, clusterId, firstInPage, sealedPage, element = allElements.back().get(),
302 indexOffset = clusterDescriptor.GetColumnRange(columnId).GetFirstElementIndex()]() {
303 const ROOT::Internal::RPagePool::RKey keyPagePool{columnId, element->GetIdentifier().fInMemoryType};
304 auto rv = UnsealPage(sealedPage, *element);
305 if (!rv) {
307 return;
308 }
309 auto newPage = rv.Unwrap();
310 fCounters->fSzUnzip.Add(element->GetSize() * sealedPage.GetNElements());
311
312 newPage.SetWindow(indexOffset + firstInPage,
314 fPagePool.PreloadPage(std::move(newPage), keyPagePool);
315 };
316
317 fTaskScheduler->AddTask(taskFunc);
318
319 firstInPage += pi.GetNElements();
320 pageNo++;
321 } // for all pages in column
322
323 fCounters->fNPageUnsealed.Add(pageNo);
324 } // for all in-memory types of the column
325 } // for all columns in cluster
326
327 fTaskScheduler->Wait();
328
330 throw RException(R__FAIL("page checksum verification failed, data corruption detected"));
331 }
332}
333
338{
339 auto descriptorGuard = GetSharedDescriptorGuard();
340 const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterKey.fClusterId);
341
342 for (auto physicalColumnId : clusterKey.fPhysicalColumnSet) {
343 if (clusterDesc.GetColumnRange(physicalColumnId).IsSuppressed())
344 continue;
345
346 const auto &pageRange = clusterDesc.GetPageRange(physicalColumnId);
348 for (const auto &pageInfo : pageRange.GetPageInfos()) {
349 if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
352 pageInfo.GetLocator().GetNBytesOnStorage()));
353 } else {
355 }
356 ++pageNo;
357 }
358 }
359}
360
362{
363 if (fLastUsedCluster == clusterId)
364 return;
365
367 GetSharedDescriptorGuard()->GetClusterDescriptor(clusterId).GetFirstEntryIndex();
368 auto itr = fPreloadedClusters.begin();
369 while ((itr != fPreloadedClusters.end()) && (itr->first < firstEntryIndex)) {
370 if (fPinnedClusters.count(itr->second) > 0) {
371 ++itr;
372 } else {
373 fPagePool.Evict(itr->second);
374 itr = fPreloadedClusters.erase(itr);
375 }
376 }
377 std::size_t poolWindow = 0;
378 while ((itr != fPreloadedClusters.end()) &&
380 ++itr;
381 ++poolWindow;
382 }
383 while (itr != fPreloadedClusters.end()) {
384 if (fPinnedClusters.count(itr->second) > 0) {
385 ++itr;
386 } else {
387 fPagePool.Evict(itr->second);
388 itr = fPreloadedClusters.erase(itr);
389 }
390 }
391
392 fLastUsedCluster = clusterId;
393}
394
397{
398 const auto clusterId = localIndex.GetClusterId();
399
401 {
402 auto descriptorGuard = GetSharedDescriptorGuard();
403 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
404 pageInfo = clusterDescriptor.GetPageRange(physicalColumnId).Find(localIndex.GetIndexInCluster());
405 }
406
407 assert(pageInfo.GetLocator().GetType() != RNTupleLocator::kTypePageZero);
408
409 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
410 sealedPage.SetNElements(pageInfo.GetNElements());
411 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
412
413 if (!sealedPage.GetBuffer())
414 return;
415
416 LoadSealedPageImpl(pageInfo.GetLocator(), sealedPage);
417 sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
418}
419
422{
423 const auto &pageInfo = pageSummary.fPageInfo;
424 assert(pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero);
425
426 const auto element = columnHandle.fColumn->GetElement();
427 const auto elementSize = element->GetSize();
428 const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;
429
430 auto pageZero = fPageAllocator->NewPage(elementSize, pageInfo.GetNElements());
431 pageZero.GrowUnchecked(pageInfo.GetNElements());
432 std::memset(pageZero.GetBuffer(), 0, pageZero.GetNBytes());
433 pageZero.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(),
434 RPage::RClusterInfo(pageSummary.fClusterId, pageSummary.fColumnOffset));
435 return fPagePool.RegisterPage(std::move(pageZero), RPagePool::RKey{columnHandle.fPhysicalId, elementInMemoryType});
436}
437
440{
441 if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) {
442 throw RException(R__FAIL("tried to read a page with an unknown locator"));
443 } else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
444 return LoadZeroPage(columnHandle, pageSummary);
445 }
446
447 const auto &columnId = columnHandle.fPhysicalId;
448 const auto &clusterId = pageSummary.fClusterId;
449 const auto &pageInfo = pageSummary.fPageInfo;
450
451 const auto element = columnHandle.fColumn->GetElement();
452 const auto elementSize = element->GetSize();
453 const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;
454
455 UpdateLastUsedCluster(clusterId);
456
458 sealedPage.SetNElements(pageInfo.GetNElements());
459 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
460 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
461 std::unique_ptr<unsigned char[]> directReadBuffer; // only used if cluster pool is turned off
462
463 if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) {
465 sealedPage.SetBuffer(directReadBuffer.get());
466 LoadSealedPageImpl(pageInfo.GetLocator(), sealedPage);
467
468 fCounters->fNPageRead.Inc();
469 fCounters->fNRead.Inc();
470 fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize());
471 } else {
472 if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId))
473 fCurrentCluster = fClusterPool.GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet());
474 R__ASSERT(fCurrentCluster->ContainsColumn(columnId));
475
476 // The cluster pool may have unzipped the required page into the page pool
478 RNTupleLocalIndex(clusterId, pageInfo.GetFirstElementIndex()));
479 if (!cachedPageRef.Get().IsNull())
480 return cachedPageRef;
481
482 ROnDiskPage::Key key(columnId, pageInfo.GetPageNumber());
483 auto onDiskPage = fCurrentCluster->GetOnDiskPage(key);
484 R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize()));
485 sealedPage.SetBuffer(onDiskPage->GetAddress());
486 }
487
489 {
490 RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip);
491 newPage = UnsealPage(sealedPage, *element).Unwrap();
492 fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements());
493 }
494
495 newPage.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(),
497 fCounters->fNPageUnsealed.Inc();
498
499 return fPagePool.RegisterPage(std::move(newPage), RPagePool::RKey{columnId, elementInMemoryType});
500}
501
504{
505 const auto columnId = columnHandle.fPhysicalId;
506 const auto columnElementId = columnHandle.fColumn->GetElement()->GetIdentifier();
507 auto cachedPageRef =
508 fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, columnElementId.fInMemoryType}, globalIndex);
509 if (!cachedPageRef.Get().IsNull()) {
510 UpdateLastUsedCluster(cachedPageRef.Get().GetClusterInfo().GetId());
511 return cachedPageRef;
512 }
513
515 {
516 auto descriptorGuard = GetSharedDescriptorGuard();
517 pageSummary.fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);
518
519 if (pageSummary.fClusterId == ROOT::kInvalidDescriptorId)
520 throw RException(R__FAIL("entry with index " + std::to_string(globalIndex) + " out of bounds"));
521
522 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(pageSummary.fClusterId);
523 const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
524 if (columnRange.IsSuppressed())
526
527 pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
528 R__ASSERT(pageSummary.fColumnOffset <= globalIndex);
529 pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(globalIndex - pageSummary.fColumnOffset);
530 }
531
532 return LoadPageFromSummary(columnHandle, pageSummary);
533}
534
537{
538 const auto clusterId = localIndex.GetClusterId();
539 const auto columnId = columnHandle.fPhysicalId;
540 const auto columnElementId = columnHandle.fColumn->GetElement()->GetIdentifier();
541 auto cachedPageRef =
542 fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, columnElementId.fInMemoryType}, localIndex);
543 if (!cachedPageRef.Get().IsNull()) {
544 UpdateLastUsedCluster(clusterId);
545 return cachedPageRef;
546 }
547
549 throw RException(R__FAIL("entry out of bounds"));
550
552 {
553 auto descriptorGuard = GetSharedDescriptorGuard();
554 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
555 const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
556 if (columnRange.IsSuppressed())
558
559 pageSummary.fClusterId = clusterId;
560 pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
561 pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(localIndex.GetIndexInCluster());
562 }
563
564 return LoadPageFromSummary(columnHandle, pageSummary);
565}
566
568{
569 fMetrics = RNTupleMetrics(prefix);
570 fMetrics.ObserveMetrics(fClusterPool.GetMetrics());
571 fMetrics.ObserveMetrics(fPagePool.GetMetrics());
572 fCounters = std::make_unique<RCounters>(RCounters{
573 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nReadV", "", "number of vector read requests"),
574 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nRead", "", "number of byte ranges read"),
575 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szReadPayload", "B", "volume read from storage (required)"),
576 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szReadOverhead", "B", "volume read from storage (overhead)"),
577 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szUnzip", "B", "volume after unzipping"),
578 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nClusterLoaded", "",
579 "number of partial clusters preloaded from storage"),
580 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nPageRead", "", "number of pages read from storage"),
581 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nPageUnsealed", "", "number of pages unzipped and decoded"),
582 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("timeWallRead", "ns", "wall clock time spent reading"),
583 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("timeWallUnzip", "ns", "wall clock time spent decompressing"),
584 *fMetrics.MakeCounter<RNTupleTickCounter<RNTupleAtomicCounter> *>("timeCpuRead", "ns", "CPU time spent reading"),
585 *fMetrics.MakeCounter<RNTupleTickCounter<RNTupleAtomicCounter> *>("timeCpuUnzip", "ns",
586 "CPU time spent decompressing"),
587 *fMetrics.MakeCounter<RNTupleCalcPerf *>(
588 "bwRead", "MB/s", "bandwidth compressed bytes read per second", fMetrics,
589 [](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
590 if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) {
591 if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) {
592 if (const auto timeWallRead = metrics.GetLocalCounter("timeWallRead")) {
593 if (auto walltime = timeWallRead->GetValueAsInt()) {
594 double payload = szReadPayload->GetValueAsInt();
595 double overhead = szReadOverhead->GetValueAsInt();
596 // unit: bytes / nanosecond = GB/s
597 return {true, (1000. * (payload + overhead) / walltime)};
598 }
599 }
600 }
601 }
602 return {false, -1.};
603 }),
604 *fMetrics.MakeCounter<RNTupleCalcPerf *>(
605 "bwReadUnzip", "MB/s", "bandwidth uncompressed bytes read per second", fMetrics,
606 [](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
607 if (const auto szUnzip = metrics.GetLocalCounter("szUnzip")) {
608 if (const auto timeWallRead = metrics.GetLocalCounter("timeWallRead")) {
609 if (auto walltime = timeWallRead->GetValueAsInt()) {
610 double unzip = szUnzip->GetValueAsInt();
611 // unit: bytes / nanosecond = GB/s
612 return {true, 1000. * unzip / walltime};
613 }
614 }
615 }
616 return {false, -1.};
617 }),
618 *fMetrics.MakeCounter<RNTupleCalcPerf *>(
619 "bwUnzip", "MB/s", "decompression bandwidth of uncompressed bytes per second", fMetrics,
620 [](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
621 if (const auto szUnzip = metrics.GetLocalCounter("szUnzip")) {
622 if (const auto timeWallUnzip = metrics.GetLocalCounter("timeWallUnzip")) {
623 if (auto walltime = timeWallUnzip->GetValueAsInt()) {
624 double unzip = szUnzip->GetValueAsInt();
625 // unit: bytes / nanosecond = GB/s
626 return {true, 1000. * unzip / walltime};
627 }
628 }
629 }
630 return {false, -1.};
631 }),
632 *fMetrics.MakeCounter<RNTupleCalcPerf *>(
633 "rtReadEfficiency", "", "ratio of payload over all bytes read", fMetrics,
634 [](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
635 if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) {
636 if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) {
637 if (auto payload = szReadPayload->GetValueAsInt()) {
638 // r/(r+o) = 1/((r+o)/r) = 1/(1 + o/r)
639 return {true, 1. / (1. + (1. * szReadOverhead->GetValueAsInt()) / payload)};
640 }
641 }
642 }
643 return {false, -1.};
644 }),
645 *fMetrics.MakeCounter<RNTupleCalcPerf *>("rtCompression", "", "ratio of compressed bytes / uncompressed bytes",
646 fMetrics, [](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
647 if (const auto szReadPayload =
648 metrics.GetLocalCounter("szReadPayload")) {
649 if (const auto szUnzip = metrics.GetLocalCounter("szUnzip")) {
650 if (auto unzip = szUnzip->GetValueAsInt()) {
651 return {true, (1. * szReadPayload->GetValueAsInt()) / unzip};
652 }
653 }
654 }
655 return {false, -1.};
656 })});
657}
658
661{
662 return UnsealPage(sealedPage, element, *fPageAllocator);
663}
664
668{
669 // Unsealing a page zero is a no-op. `RPageRange::ExtendToFitColumnRange()` guarantees that the page zero buffer is
670 // large enough to hold `sealedPage.fNElements`
672 auto page = pageAlloc.NewPage(element.GetSize(), sealedPage.GetNElements());
673 page.GrowUnchecked(sealedPage.GetNElements());
674 memset(page.GetBuffer(), 0, page.GetNBytes());
675 return page;
676 }
677
678 auto rv = sealedPage.VerifyChecksumIfEnabled();
679 if (!rv)
680 return R__FORWARD_ERROR(rv);
681
682 const auto bytesPacked = element.GetPackedSize(sealedPage.GetNElements());
683 auto page = pageAlloc.NewPage(element.GetPackedSize(), sealedPage.GetNElements());
684 if (sealedPage.GetDataSize() != bytesPacked) {
686 page.GetBuffer());
687 } else {
688 // We cannot simply map the sealed page as we don't know its life time. Specialized page sources
689 // may decide to implement to not use UnsealPage but to custom mapping / decompression code.
690 // Note that usually pages are compressed.
691 memcpy(page.GetBuffer(), sealedPage.GetBuffer(), bytesPacked);
692 }
693
694 if (!element.IsMappable()) {
695 auto tmp = pageAlloc.NewPage(element.GetSize(), sealedPage.GetNElements());
696 element.Unpack(tmp.GetBuffer(), page.GetBuffer(), sealedPage.GetNElements());
697 page = std::move(tmp);
698 }
699
700 page.GrowUnchecked(sealedPage.GetNElements());
701 return page;
702}
703
705{
706 if (fHasStreamerInfosRegistered)
707 return;
708
709 for (const auto &extraTypeInfo : fDescriptor.GetExtraTypeInfoIterable()) {
711 continue;
712 // We don't need the result, it's enough that during deserialization, BuildCheck() is called for every
713 // streamer info record.
715 }
716
717 fHasStreamerInfosRegistered = true;
718}
719
720//------------------------------------------------------------------------------
721
723{
724 // Make the sort order unique by adding the physical on-disk column id as a secondary key
725 if (fCurrentPageSize == other.fCurrentPageSize)
726 return fColumn->GetOnDiskId() > other.fColumn->GetOnDiskId();
727 return fCurrentPageSize > other.fCurrentPageSize;
728}
729
731{
732 if (fMaxAllocatedBytes - fCurrentAllocatedBytes >= targetAvailableSize)
733 return true;
734
735 auto itr = fColumnsSortedByPageSize.begin();
736 while (itr != fColumnsSortedByPageSize.end()) {
737 if (itr->fCurrentPageSize <= pageSizeLimit)
738 break;
739 if (itr->fCurrentPageSize == itr->fInitialPageSize) {
740 ++itr;
741 continue;
742 }
743
744 // Flushing the current column will invalidate itr
745 auto itrFlush = itr++;
746
747 RColumnInfo next;
748 if (itr != fColumnsSortedByPageSize.end())
749 next = *itr;
750
751 itrFlush->fColumn->Flush();
752 if (fMaxAllocatedBytes - fCurrentAllocatedBytes >= targetAvailableSize)
753 return true;
754
755 if (next.fColumn == nullptr)
756 return false;
757 itr = fColumnsSortedByPageSize.find(next);
758 };
759
760 return false;
761}
762
764{
765 const RColumnInfo key{&column, column.GetWritePageCapacity(), 0};
766 auto itr = fColumnsSortedByPageSize.find(key);
767 if (itr == fColumnsSortedByPageSize.end()) {
768 if (!TryEvict(newWritePageSize, 0))
769 return false;
770 fColumnsSortedByPageSize.insert({&column, newWritePageSize, newWritePageSize});
771 fCurrentAllocatedBytes += newWritePageSize;
772 return true;
773 }
774
776 assert(newWritePageSize >= elem.fInitialPageSize);
777
778 if (newWritePageSize == elem.fCurrentPageSize)
779 return true;
780
781 fColumnsSortedByPageSize.erase(itr);
782
783 if (newWritePageSize < elem.fCurrentPageSize) {
784 // Page got smaller
785 fCurrentAllocatedBytes -= elem.fCurrentPageSize - newWritePageSize;
786 elem.fCurrentPageSize = newWritePageSize;
787 fColumnsSortedByPageSize.insert(elem);
788 return true;
789 }
790
791 // Page got larger, we may need to make space available
792 const auto diffBytes = newWritePageSize - elem.fCurrentPageSize;
793 if (!TryEvict(diffBytes, elem.fCurrentPageSize)) {
794 // Don't change anything, let the calling column flush itself
795 // TODO(jblomer): we may consider skipping the column in TryEvict and thus avoiding erase+insert
796 fColumnsSortedByPageSize.insert(elem);
797 return false;
798 }
799 fCurrentAllocatedBytes += diffBytes;
800 elem.fCurrentPageSize = newWritePageSize;
801 fColumnsSortedByPageSize.insert(elem);
802 return true;
803}
804
805//------------------------------------------------------------------------------
806
808 : RPageStorage(name), fOptions(options.Clone()), fWritePageMemoryManager(options.GetPageBufferBudget())
809{
811}
812
814
816{
817 assert(config.fPage);
818 assert(config.fElement);
819 assert(config.fBuffer);
820
821 unsigned char *pageBuf = reinterpret_cast<unsigned char *>(config.fPage->GetBuffer());
822 bool isAdoptedBuffer = true;
823 auto nBytesPacked = config.fPage->GetNBytes();
824 auto nBytesChecksum = config.fWriteChecksum * kNBytesPageChecksum;
825
826 if (!config.fElement->IsMappable()) {
827 nBytesPacked = config.fElement->GetPackedSize(config.fPage->GetNElements());
828 pageBuf = new unsigned char[nBytesPacked];
829 isAdoptedBuffer = false;
830 config.fElement->Pack(pageBuf, config.fPage->GetBuffer(), config.fPage->GetNElements());
831 }
833
834 if ((config.fCompressionSettings != 0) || !config.fElement->IsMappable() || !config.fAllowAlias ||
835 config.fWriteChecksum) {
838 if (!isAdoptedBuffer)
839 delete[] pageBuf;
840 pageBuf = reinterpret_cast<unsigned char *>(config.fBuffer);
841 isAdoptedBuffer = true;
842 }
843
845
847 sealedPage.ChecksumIfEnabled();
848
849 return sealedPage;
850}
851
854{
855 const auto nBytes = page.GetNBytes() + GetWriteOptions().GetEnablePageChecksums() * kNBytesPageChecksum;
856 if (fSealPageBuffer.size() < nBytes)
857 fSealPageBuffer.resize(nBytes);
858
859 RSealPageConfig config;
860 config.fPage = &page;
861 config.fElement = &element;
862 config.fCompressionSettings = GetWriteOptions().GetCompression();
863 config.fWriteChecksum = GetWriteOptions().GetEnablePageChecksums();
864 config.fAllowAlias = true;
865 config.fBuffer = fSealPageBuffer.data();
866
867 return SealPage(config);
868}
869
871{
872 for (const auto &cb : fOnDatasetCommitCallbacks)
873 cb(*this);
874 return CommitDatasetImpl();
875}
876
878{
879 R__ASSERT(nElements > 0);
880 const auto elementSize = columnHandle.fColumn->GetElement()->GetSize();
881 const auto nBytes = elementSize * nElements;
882 if (!fWritePageMemoryManager.TryUpdate(*columnHandle.fColumn, nBytes))
883 return ROOT::Internal::RPage();
884 return fPageAllocator->NewPage(elementSize, nElements);
885}
886
887//------------------------------------------------------------------------------
888
889std::unique_ptr<ROOT::Internal::RPageSink>
890ROOT::Internal::RPagePersistentSink::Create(std::string_view ntupleName, std::string_view location,
891 const ROOT::RNTupleWriteOptions &options)
892{
893 if (ntupleName.empty()) {
894 throw RException(R__FAIL("empty RNTuple name"));
895 }
896 if (location.empty()) {
897 throw RException(R__FAIL("empty storage location"));
898 }
899 if (location.find("daos://") == 0) {
900#ifdef R__ENABLE_DAOS
901 return std::make_unique<ROOT::Experimental::Internal::RPageSinkDaos>(ntupleName, location, options);
902#else
903 throw RException(R__FAIL("This RNTuple build does not support DAOS."));
904#endif
905 }
906
907 // Otherwise assume that the user wants us to create a file.
908 return std::make_unique<ROOT::Internal::RPageSinkFile>(ntupleName, location, options);
909}
910
912 const ROOT::RNTupleWriteOptions &options)
913 : RPageSink(name, options)
914{
915}
916
918
921{
922 auto columnId = fDescriptorBuilder.GetDescriptor().GetNPhysicalColumns();
924 columnBuilder.LogicalColumnId(columnId)
925 .PhysicalColumnId(columnId)
926 .FieldId(fieldId)
927 .BitsOnStorage(column.GetBitsOnStorage())
928 .ValueRange(column.GetValueRange())
929 .Type(column.GetType())
930 .Index(column.GetIndex())
931 .RepresentationIndex(column.GetRepresentationIndex())
932 .FirstElementIndex(column.GetFirstElementIndex());
933 // For late model extension, we assume that the primary column representation is the active one for the
934 // deferred range. All other representations are suppressed.
935 if (column.GetFirstElementIndex() > 0 && column.GetRepresentationIndex() > 0)
936 columnBuilder.SetSuppressedDeferred();
937 fDescriptorBuilder.AddColumn(columnBuilder.MakeDescriptor().Unwrap());
938 return ColumnHandle_t{columnId, &column};
939}
940
943{
944 if (fIsInitialized) {
945 for (const auto &field : changeset.fAddedFields) {
946 if (field->GetStructure() == ENTupleStructure::kStreamer) {
947 throw ROOT::RException(R__FAIL("a Model cannot be extended with Streamer fields"));
948 }
949 }
950 }
951
952 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
953
954 if (descriptor.GetNLogicalColumns() > descriptor.GetNPhysicalColumns()) {
955 // If we already have alias columns, add an offset to the alias columns so that the new physical columns
956 // of the changeset follow immediately the already existing physical columns
957 auto getNColumns = [](const ROOT::RFieldBase &f) -> std::size_t {
958 const auto &reps = f.GetColumnRepresentatives();
959 if (reps.empty())
960 return 0;
961 return reps.size() * reps[0].size();
962 };
963 std::uint32_t nNewPhysicalColumns = 0;
964 for (auto f : changeset.fAddedFields) {
966 for (const auto &descendant : *f)
968 }
969 fDescriptorBuilder.ShiftAliasColumns(nNewPhysicalColumns);
970 }
971
972 auto addField = [&](ROOT::RFieldBase &f) {
973 auto fieldId = descriptor.GetNFields();
974 fDescriptorBuilder.AddField(RFieldDescriptorBuilder::FromField(f).FieldId(fieldId).MakeDescriptor().Unwrap());
975 fDescriptorBuilder.AddFieldLink(f.GetParent()->GetOnDiskId(), fieldId);
976 f.SetOnDiskId(fieldId);
977 ROOT::Internal::CallConnectPageSinkOnField(f, *this, firstEntry); // issues in turn calls to `AddColumn()`
978 };
980 auto fieldId = descriptor.GetNFields();
981 auto sourceFieldId =
983 fDescriptorBuilder.AddField(RFieldDescriptorBuilder::FromField(f).FieldId(fieldId).MakeDescriptor().Unwrap());
984 fDescriptorBuilder.AddFieldLink(f.GetParent()->GetOnDiskId(), fieldId);
985 fDescriptorBuilder.AddFieldProjection(sourceFieldId, fieldId);
986 f.SetOnDiskId(fieldId);
987 for (const auto &source : descriptor.GetColumnIterable(sourceFieldId)) {
988 auto targetId = descriptor.GetNLogicalColumns();
990 columnBuilder.LogicalColumnId(targetId)
991 .PhysicalColumnId(source.GetLogicalId())
992 .FieldId(fieldId)
993 .BitsOnStorage(source.GetBitsOnStorage())
994 .ValueRange(source.GetValueRange())
995 .Type(source.GetType())
996 .Index(source.GetIndex())
997 .RepresentationIndex(source.GetRepresentationIndex());
998 fDescriptorBuilder.AddColumn(columnBuilder.MakeDescriptor().Unwrap());
999 }
1000 };
1001
1002 R__ASSERT(firstEntry >= fPrevClusterNEntries);
1003 const auto nColumnsBeforeUpdate = descriptor.GetNPhysicalColumns();
1004 for (auto f : changeset.fAddedFields) {
1005 addField(*f);
1006 for (auto &descendant : *f)
1008 }
1009 for (auto f : changeset.fAddedProjectedFields) {
1011 for (auto &descendant : *f)
1013 }
1014
1015 const auto nColumns = descriptor.GetNPhysicalColumns();
1016 fOpenColumnRanges.reserve(fOpenColumnRanges.size() + (nColumns - nColumnsBeforeUpdate));
1017 fOpenPageRanges.reserve(fOpenPageRanges.size() + (nColumns - nColumnsBeforeUpdate));
1020 columnRange.SetPhysicalColumnId(i);
1021 // We set the first element index in the current cluster to the first element that is part of a materialized page
1022 // (i.e., that is part of a page list). For columns created during late model extension, however, the column range
1023 // is fixed up as needed by `RClusterDescriptorBuilder::AddExtendedColumnRanges()` on read back.
1024 columnRange.SetFirstElementIndex(descriptor.GetColumnDescriptor(i).GetFirstElementIndex());
1025 columnRange.SetNElements(0);
1026 columnRange.SetCompressionSettings(GetWriteOptions().GetCompression());
1027 fOpenColumnRanges.emplace_back(columnRange);
1029 pageRange.SetPhysicalColumnId(i);
1030 fOpenPageRanges.emplace_back(std::move(pageRange));
1031 }
1032
1033 // Mapping of memory to on-disk column IDs usually happens during serialization of the ntuple header. If the
1034 // header was already serialized, this has to be done manually as it is required for page list serialization.
1035 if (fSerializationContext.GetHeaderSize() > 0)
1036 fSerializationContext.MapSchema(descriptor, /*forHeaderExtension=*/true);
1037}
1038
1040{
1041 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1042 throw RException(R__FAIL("ROOT bug: unexpected type extra info in UpdateExtraTypeInfo()"));
1043
1044 fInfosOfStreamerFields.merge(RNTupleSerializer::DeserializeStreamerInfos(extraTypeInfo.GetContent()).Unwrap());
1045}
1046
1048{
1049 fDescriptorBuilder.SetNTuple(fNTupleName, model.GetDescription());
1050 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
1051
1053 fDescriptorBuilder.AddField(RFieldDescriptorBuilder::FromField(fieldZero).FieldId(0).MakeDescriptor().Unwrap());
1054 fieldZero.SetOnDiskId(0);
1056 projectedFields.GetFieldZero().SetOnDiskId(0);
1057
1059 initialChangeset.fAddedFields.reserve(fieldZero.GetMutableSubfields().size());
1060 for (auto f : fieldZero.GetMutableSubfields())
1061 initialChangeset.fAddedFields.emplace_back(f);
1062 initialChangeset.fAddedProjectedFields.reserve(projectedFields.GetFieldZero().GetMutableSubfields().size());
1063 for (auto f : projectedFields.GetFieldZero().GetMutableSubfields())
1064 initialChangeset.fAddedProjectedFields.emplace_back(f);
1065 UpdateSchema(initialChangeset, 0U);
1066
1067 fSerializationContext = RNTupleSerializer::SerializeHeader(nullptr, descriptor).Unwrap();
1068 auto buffer = MakeUninitArray<unsigned char>(fSerializationContext.GetHeaderSize());
1069 fSerializationContext = RNTupleSerializer::SerializeHeader(buffer.get(), descriptor).Unwrap();
1070 InitImpl(buffer.get(), fSerializationContext.GetHeaderSize());
1071
1072 fDescriptorBuilder.BeginHeaderExtension();
1073}
1074
1075std::unique_ptr<ROOT::RNTupleModel>
1077{
1078 // Create new descriptor
1079 fDescriptorBuilder.SetSchemaFromExisting(srcDescriptor);
1080 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
1081
1082 // Create column/page ranges
1083 const auto nColumns = descriptor.GetNPhysicalColumns();
1084 R__ASSERT(fOpenColumnRanges.empty() && fOpenPageRanges.empty());
1085 fOpenColumnRanges.reserve(nColumns);
1086 fOpenPageRanges.reserve(nColumns);
1087 for (ROOT::DescriptorId_t i = 0; i < nColumns; ++i) {
1088 const auto &column = descriptor.GetColumnDescriptor(i);
1090 columnRange.SetPhysicalColumnId(i);
1091 columnRange.SetFirstElementIndex(column.GetFirstElementIndex());
1092 columnRange.SetNElements(0);
1093 columnRange.SetCompressionSettings(GetWriteOptions().GetCompression());
1094 fOpenColumnRanges.emplace_back(columnRange);
1096 pageRange.SetPhysicalColumnId(i);
1097 fOpenPageRanges.emplace_back(std::move(pageRange));
1098 }
1099
1100 if (copyClusters) {
1101 // Clone and add all cluster descriptors
1102 auto clusterId = srcDescriptor.FindClusterId(0, 0);
1104 auto &cluster = srcDescriptor.GetClusterDescriptor(clusterId);
1105 auto nEntries = cluster.GetNEntries();
1106 for (unsigned int i = 0; i < fOpenColumnRanges.size(); ++i) {
1107 R__ASSERT(fOpenColumnRanges[i].GetPhysicalColumnId() == i);
1108 if (!cluster.ContainsColumn(i)) // a cluster may not contain a column if that column is deferred
1109 break;
1110 const auto &columnRange = cluster.GetColumnRange(i);
1111 R__ASSERT(columnRange.GetPhysicalColumnId() == i);
1112 // TODO: properly handle suppressed columns (check MarkSuppressedColumnRange())
1113 fOpenColumnRanges[i].IncrementFirstElementIndex(columnRange.GetNElements());
1114 }
1115 fDescriptorBuilder.AddCluster(cluster.Clone());
1116 fPrevClusterNEntries += nEntries;
1117
1118 clusterId = srcDescriptor.FindNextClusterId(clusterId);
1119 }
1120 }
1121
1122 // Create model
1124 modelOpts.SetReconstructProjections(true);
1125 // We want to emulate unknown types to allow merging RNTuples containing types that we lack dictionaries for.
1126 modelOpts.SetEmulateUnknownTypes(true);
1127 auto model = descriptor.CreateModel(modelOpts);
1128 if (!copyClusters) {
1130 projectedFields.GetFieldZero().SetOnDiskId(model->GetConstFieldZero().GetOnDiskId());
1131 }
1132
1133 // Serialize header and init from it
1134 fSerializationContext = RNTupleSerializer::SerializeHeader(nullptr, descriptor).Unwrap();
1135 auto buffer = MakeUninitArray<unsigned char>(fSerializationContext.GetHeaderSize());
1136 fSerializationContext = RNTupleSerializer::SerializeHeader(buffer.get(), descriptor).Unwrap();
1137 InitImpl(buffer.get(), fSerializationContext.GetHeaderSize());
1138
1139 fDescriptorBuilder.BeginHeaderExtension();
1140
1141 // mark this sink as initialized
1142 fIsInitialized = true;
1143
1144 return model;
1145}
1146
1148{
1149 fOpenColumnRanges.at(columnHandle.fPhysicalId).SetIsSuppressed(true);
1150}
1151
1153{
1154 fOpenColumnRanges.at(columnHandle.fPhysicalId).IncrementNElements(page.GetNElements());
1155
1157 pageInfo.SetNElements(page.GetNElements());
1158 pageInfo.SetLocator(CommitPageImpl(columnHandle, page));
1159 pageInfo.SetHasChecksum(GetWriteOptions().GetEnablePageChecksums());
1160 fOpenPageRanges.at(columnHandle.fPhysicalId).GetPageInfos().emplace_back(pageInfo);
1161}
1162
1165{
1166 fOpenColumnRanges.at(physicalColumnId).IncrementNElements(sealedPage.GetNElements());
1167
1169 pageInfo.SetNElements(sealedPage.GetNElements());
1170 pageInfo.SetLocator(CommitSealedPageImpl(physicalColumnId, sealedPage));
1171 pageInfo.SetHasChecksum(sealedPage.GetHasChecksum());
1172 fOpenPageRanges.at(physicalColumnId).GetPageInfos().emplace_back(pageInfo);
1173}
1174
1175std::vector<ROOT::RNTupleLocator>
1176ROOT::Internal::RPagePersistentSink::CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges,
1177 const std::vector<bool> &mask)
1178{
1179 std::vector<ROOT::RNTupleLocator> locators;
1180 locators.reserve(mask.size());
1181 std::size_t i = 0;
1182 for (auto &range : ranges) {
1183 for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1184 if (mask[i++])
1185 locators.push_back(CommitSealedPageImpl(range.fPhysicalColumnId, *sealedPageIt));
1186 }
1187 }
1188 locators.shrink_to_fit();
1189 return locators;
1190}
1191
1192void ROOT::Internal::RPagePersistentSink::CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges)
1193{
1194 /// Used in the `originalPages` map
1195 struct RSealedPageLink {
1196 const RSealedPage *fSealedPage = nullptr; ///< Points to the first occurrence of a page with a specific checksum
1197 std::size_t fLocatorIdx = 0; ///< The index in the locator vector returned by CommitSealedPageVImpl()
1198 };
1199
1200 std::vector<bool> mask;
1201 // For every sealed page, stores the corresponding index in the locator vector returned by CommitSealedPageVImpl()
1202 std::vector<std::size_t> locatorIndexes;
1203 // Maps page checksums to the first sealed page with that checksum
1204 std::unordered_map<std::uint64_t, RSealedPageLink> originalPages;
1205 std::size_t iLocator = 0;
1206 for (auto &range : ranges) {
1207 const auto rangeSize = std::distance(range.fFirst, range.fLast);
1208 mask.reserve(mask.size() + rangeSize);
1209 locatorIndexes.reserve(locatorIndexes.size() + rangeSize);
1210
1211 for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1212 if (!fFeatures.fCanMergePages || !fOptions->GetEnableSamePageMerging()) {
1213 mask.emplace_back(true);
1214 locatorIndexes.emplace_back(iLocator++);
1215 continue;
1216 }
1217 // Same page merging requires page checksums - this is checked in the write options
1218 R__ASSERT(sealedPageIt->GetHasChecksum());
1219
1220 const auto chk = sealedPageIt->GetChecksum().Unwrap();
1221 auto itr = originalPages.find(chk);
1222 if (itr == originalPages.end()) {
1223 originalPages.insert({chk, {&(*sealedPageIt), iLocator}});
1224 mask.emplace_back(true);
1225 locatorIndexes.emplace_back(iLocator++);
1226 continue;
1227 }
1228
1229 const auto *p = itr->second.fSealedPage;
1230 if (sealedPageIt->GetDataSize() != p->GetDataSize() ||
1231 memcmp(sealedPageIt->GetBuffer(), p->GetBuffer(), p->GetDataSize())) {
1232 mask.emplace_back(true);
1233 locatorIndexes.emplace_back(iLocator++);
1234 continue;
1235 }
1236
1237 mask.emplace_back(false);
1238 locatorIndexes.emplace_back(itr->second.fLocatorIdx);
1239 }
1240
1241 mask.shrink_to_fit();
1242 locatorIndexes.shrink_to_fit();
1243 }
1244
1245 auto locators = CommitSealedPageVImpl(ranges, mask);
1246 unsigned i = 0;
1247
1248 for (auto &range : ranges) {
1249 for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
1250 fOpenColumnRanges.at(range.fPhysicalColumnId).IncrementNElements(sealedPageIt->GetNElements());
1251
1253 pageInfo.SetNElements(sealedPageIt->GetNElements());
1254 pageInfo.SetLocator(locators[locatorIndexes[i++]]);
1255 pageInfo.SetHasChecksum(sealedPageIt->GetHasChecksum());
1256 fOpenPageRanges.at(range.fPhysicalColumnId).GetPageInfos().emplace_back(pageInfo);
1257 }
1258 }
1259}
1260
1263{
1265 stagedCluster.fNBytesWritten = StageClusterImpl();
1266 stagedCluster.fNEntries = nNewEntries;
1267
1268 for (unsigned int i = 0; i < fOpenColumnRanges.size(); ++i) {
1269 RStagedCluster::RColumnInfo columnInfo;
1270 columnInfo.fCompressionSettings = fOpenColumnRanges[i].GetCompressionSettings().value();
1271 if (fOpenColumnRanges[i].IsSuppressed()) {
1272 assert(fOpenPageRanges[i].GetPageInfos().empty());
1273 columnInfo.fPageRange.SetPhysicalColumnId(i);
1274 columnInfo.fIsSuppressed = true;
1275 // We reset suppressed columns to the state they would have if they were active (not suppressed).
1276 fOpenColumnRanges[i].SetNElements(0);
1277 fOpenColumnRanges[i].SetIsSuppressed(false);
1278 } else {
1279 std::swap(columnInfo.fPageRange, fOpenPageRanges[i]);
1280 fOpenPageRanges[i].SetPhysicalColumnId(i);
1281
1282 columnInfo.fNElements = fOpenColumnRanges[i].GetNElements();
1283 fOpenColumnRanges[i].SetNElements(0);
1284 }
1285 stagedCluster.fColumnInfos.push_back(std::move(columnInfo));
1286 }
1287
1288 return stagedCluster;
1289}
1290
1292{
1293 for (const auto &cluster : clusters) {
1295 clusterBuilder.ClusterId(fDescriptorBuilder.GetDescriptor().GetNActiveClusters())
1296 .FirstEntryIndex(fPrevClusterNEntries)
1297 .NEntries(cluster.fNEntries);
1298 for (const auto &columnInfo : cluster.fColumnInfos) {
1299 const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1300 if (columnInfo.fIsSuppressed) {
1301 assert(columnInfo.fPageRange.GetPageInfos().empty());
1302 clusterBuilder.MarkSuppressedColumnRange(colId);
1303 } else {
1304 clusterBuilder.CommitColumnRange(colId, fOpenColumnRanges[colId].GetFirstElementIndex(),
1305 columnInfo.fCompressionSettings, columnInfo.fPageRange);
1306 fOpenColumnRanges[colId].IncrementFirstElementIndex(columnInfo.fNElements);
1307 }
1308 }
1309
1310 clusterBuilder.CommitSuppressedColumnRanges(fDescriptorBuilder.GetDescriptor()).ThrowOnError();
1311 for (const auto &columnInfo : cluster.fColumnInfos) {
1312 if (!columnInfo.fIsSuppressed)
1313 continue;
1314 const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1315 // For suppressed columns, we need to reset the first element index to the first element of the next (upcoming)
1316 // cluster. This information has been determined for the committed cluster descriptor through
1317 // CommitSuppressedColumnRanges(), so we can use the information from the descriptor.
1318 const auto &columnRangeFromDesc = clusterBuilder.GetColumnRange(colId);
1319 fOpenColumnRanges[colId].SetFirstElementIndex(columnRangeFromDesc.GetFirstElementIndex() +
1320 columnRangeFromDesc.GetNElements());
1321 }
1322
1323 fDescriptorBuilder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap());
1324 fPrevClusterNEntries += cluster.fNEntries;
1325 }
1326}
1327
1329{
1330 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
1331
1332 const auto nClusters = descriptor.GetNActiveClusters();
1333 std::vector<ROOT::DescriptorId_t> physClusterIDs;
1334 physClusterIDs.reserve(nClusters);
1335 for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
1336 physClusterIDs.emplace_back(fSerializationContext.MapClusterId(i));
1337 }
1338
1339 auto szPageList =
1340 RNTupleSerializer::SerializePageList(nullptr, descriptor, physClusterIDs, fSerializationContext).Unwrap();
1343
1344 const auto clusterGroupId = descriptor.GetNClusterGroups();
1345 const auto locator = CommitClusterGroupImpl(bufPageList.get(), szPageList);
1347 cgBuilder.ClusterGroupId(clusterGroupId).PageListLocator(locator).PageListLength(szPageList);
1348 if (fNextClusterInGroup == nClusters) {
1349 cgBuilder.MinEntry(0).EntrySpan(0).NClusters(0);
1350 } else {
1351 const auto &firstClusterDesc = descriptor.GetClusterDescriptor(fNextClusterInGroup);
1352 const auto &lastClusterDesc = descriptor.GetClusterDescriptor(nClusters - 1);
1353 cgBuilder.MinEntry(firstClusterDesc.GetFirstEntryIndex())
1354 .EntrySpan(lastClusterDesc.GetFirstEntryIndex() + lastClusterDesc.GetNEntries() -
1355 firstClusterDesc.GetFirstEntryIndex())
1356 .NClusters(nClusters - fNextClusterInGroup);
1357 }
1358 std::vector<ROOT::DescriptorId_t> clusterIds;
1359 clusterIds.reserve(nClusters);
1360 for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
1361 clusterIds.emplace_back(i);
1362 }
1363 cgBuilder.AddSortedClusters(clusterIds);
1364 fDescriptorBuilder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap());
1365 fSerializationContext.MapClusterGroupId(clusterGroupId);
1366
1367 fNextClusterInGroup = nClusters;
1368}
1369
1372{
1374
1376 auto attrSetDesc = attrSetDescBuilder.SchemaVersion(kSchemaVersionMajor, kSchemaVersionMinor)
1377 .AnchorLength(attrAnchorInfo.fLength)
1378 .AnchorLocator(attrAnchorInfo.fLocator)
1379 .Name(attrSetName)
1380 .MoveDescriptor()
1381 .Unwrap();
1382 fDescriptorBuilder.AddAttributeSet(std::move(attrSetDesc)).ThrowOnError();
1383}
1384
1386{
1387 if (!fInfosOfStreamerFields.empty()) {
1388 // De-duplicate extra type infos before writing. Usually we won't have them already in the descriptor, but
1389 // this may happen when we are writing back an already-existing RNTuple, e.g. when doing incremental merging.
1390 for (const auto &etDesc : fDescriptorBuilder.GetDescriptor().GetExtraTypeInfoIterable()) {
1391 if (etDesc.GetContentId() == EExtraTypeInfoIds::kStreamerInfo) {
1392 // The specification mandates that the type name for a kStreamerInfo should be empty and the type version
1393 // should be zero.
1394 R__ASSERT(etDesc.GetTypeName().empty());
1395 R__ASSERT(etDesc.GetTypeVersion() == 0);
1396 auto etInfo = RNTupleSerializer::DeserializeStreamerInfos(etDesc.GetContent()).Unwrap();
1397 fInfosOfStreamerFields.merge(etInfo);
1398 }
1399 }
1400
1403 .Content(RNTupleSerializer::SerializeStreamerInfos(fInfosOfStreamerFields));
1404 fDescriptorBuilder.ReplaceExtraTypeInfo(extraInfoBuilder.MoveDescriptor().Unwrap());
1405 }
1406
1407 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
1408
1409 auto szFooter = RNTupleSerializer::SerializeFooter(nullptr, descriptor, fSerializationContext).Unwrap();
1411 RNTupleSerializer::SerializeFooter(bufFooter.get(), descriptor, fSerializationContext);
1412
1413 return CommitDatasetImpl(bufFooter.get(), szFooter);
1414}
1415
1417{
1418 fMetrics = RNTupleMetrics(prefix);
1419 fCounters = std::make_unique<RCounters>(RCounters{
1420 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("nPageCommitted", "", "number of pages committed to storage"),
1421 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szWritePayload", "B", "volume written for committed pages"),
1422 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szZip", "B", "volume before zipping"),
1423 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("timeWallWrite", "ns", "wall clock time spent writing"),
1424 *fMetrics.MakeCounter<RNTupleAtomicCounter *>("timeWallZip", "ns", "wall clock time spent compressing"),
1425 *fMetrics.MakeCounter<RNTupleTickCounter<RNTupleAtomicCounter> *>("timeCpuWrite", "ns", "CPU time spent writing"),
1426 *fMetrics.MakeCounter<RNTupleTickCounter<RNTupleAtomicCounter> *>("timeCpuZip", "ns",
1427 "CPU time spent compressing")});
1428}
fBuffer
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define f(i)
Definition RSha256.hxx:104
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char mode
char name[80]
Definition TGX11.cxx:148
#define _(A, B)
Definition cfortran.h:108
A thread-safe integral performance counter.
A metric element that computes its floating point value from other counters.
A collection of Counter objects with a name, a unit, and a description.
A helper class for piece-wise construction of an RClusterDescriptor.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:147
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:149
A helper class for piece-wise construction of an RColumnDescriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
virtual RIdentifier GetIdentifier() const =0
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:37
std::optional< std::pair< double, double > > GetValueRange() const
Definition RColumn.hxx:345
std::uint16_t GetRepresentationIndex() const
Definition RColumn.hxx:351
ROOT::Internal::RColumnElementBase * GetElement() const
Definition RColumn.hxx:338
ROOT::ENTupleColumnType GetType() const
Definition RColumn.hxx:339
ROOT::NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:353
std::size_t GetWritePageCapacity() const
Definition RColumn.hxx:360
std::uint16_t GetBitsOnStorage() const
Definition RColumn.hxx:340
std::uint32_t GetIndex() const
Definition RColumn.hxx:350
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
A helper class for piece-wise construction of an RFieldDescriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
static unsigned int GetClusterBunchSize(const RNTupleReadOptions &options)
A helper class for serializing and deserialization of the RNTuple binary format.
static std::uint32_t SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer)
Writes a XxHash-3 64bit checksum of the byte range given by data and length.
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
static RResult< void > VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3)
Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
static RResult< std::uint32_t > SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span< ROOT::DescriptorId_t > physClusterIDs, const RContext &context)
static RResult< std::uint32_t > SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static RResult< RContext > SerializeHeader(void *buffer, const RNTupleDescriptor &desc)
static std::string SerializeStreamerInfos(const StreamerInfoMap_t &infos)
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:98
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:40
Uses standard C++ memory allocation for the column data pages.
Abstract interface to allocate and release pages.
RStagedCluster StageCluster(ROOT::NTupleSize_t nNewEntries) final
Stage the current cluster and create a new one for the following data.
void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) override
Incorporate incremental changes to the model into the ntuple descriptor.
void CommitSealedPage(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
std::unique_ptr< RNTupleModel > InitFromDescriptor(const ROOT::RNTupleDescriptor &descriptor, bool copyClusters)
Initialize sink based on an existing descriptor and fill into the descriptor builder,...
void UpdateExtraTypeInfo(const ROOT::RExtraTypeInfoDescriptor &extraTypeInfo) final
Adds an extra type information record to schema.
void CommitAttributeSet(std::string_view attrSetName, const RNTupleLink &attrAnchorInfo) final
Adds the given anchor information (name + locator) into the main RNTuple's descriptor as an attribute...
ColumnHandle_t AddColumn(ROOT::DescriptorId_t fieldId, ROOT::Internal::RColumn &column) final
Register a new column.
virtual std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask)
Vector commit of preprocessed pages.
RPagePersistentSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
void CommitSuppressedColumn(ColumnHandle_t columnHandle) final
Commits a suppressed column for the current cluster.
void CommitStagedClusters(std::span< RStagedCluster > clusters) final
Commit staged clusters, logically appending them to the ntuple descriptor.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Guess the concrete derived page source from the location.
void CommitPage(ColumnHandle_t columnHandle, const ROOT::Internal::RPage &page) final
Write a page to the storage. The column must have been added before.
virtual void InitImpl(unsigned char *serializedHeader, std::uint32_t length)=0
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
Reference to a page stored in the page pool.
Abstract interface to write data into an ntuple.
RNTupleLink CommitDataset()
Run the registered callbacks and finalize the current cluster and the entrire data set.
virtual ROOT::Internal::RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
RSealedPage SealPage(const ROOT::Internal::RPage &page, const ROOT::Internal::RColumnElementBase &element)
Helper for streaming a page.
RPageSink(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
void Insert(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId)
ROOT::Internal::RCluster::ColumnSet_t ToColumnSet() const
void Erase(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId)
void LoadStructure()
Loads header and footer without decompressing or deserializing them.
virtual ROOT::Internal::RPageRef LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex)
Allocates and fills a page that contains the index-th element.
void RegisterStreamerInfos()
Builds the streamer info records from the descriptor's extra type info section.
void Attach(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode=ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kForReading)
Open the physical storage container and deserialize header and footer.
ColumnHandle_t AddColumn(ROOT::DescriptorId_t fieldId, ROOT::Internal::RColumn &column) override
Register a new column.
void UnzipCluster(ROOT::Internal::RCluster *cluster)
Parallel decompression and unpacking of the pages in the given cluster.
void PrepareLoadCluster(const ROOT::Internal::RCluster::RKey &clusterKey, ROOT::Internal::ROnDiskPageMap &pageZeroMap, std::function< void(ROOT::DescriptorId_t, ROOT::NTupleSize_t, const ROOT::RClusterDescriptor::RPageInfo &)> perPageFunc)
Prepare a page range read for the column set in clusterKey.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
ROOT::NTupleSize_t GetNEntries()
ROOT::Internal::RPageRef LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary)
void UpdateLastUsedCluster(ROOT::DescriptorId_t clusterId)
Does nothing if fLastUsedCluster == clusterId.
ROOT::NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
ROOT::Internal::RPageRef LoadPageFromSummary(ColumnHandle_t columnHandle, const RPageSummary &pageSummary)
void DropColumn(ColumnHandle_t columnHandle) override
Unregisters a column.
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage)
Read the packed and compressed bytes of a page into the memory buffer provided by sealedPage.
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster)
RPageSource(std::string_view ntupleName, const ROOT::RNTupleReadOptions &fOptions)
void SetEntryRange(const REntryRange &range)
Promise to only read from the given entry range.
std::unique_ptr< RPageSource > Clone() const
Open the same storage multiple time, e.g.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
static RResult< ROOT::Internal::RPage > UnsealPage(const RSealedPage &sealedPage, const ROOT::Internal::RColumnElementBase &element, ROOT::Internal::RPageAllocator &pageAlloc)
Helper for unstreaming a page.
Common functionality of an ntuple storage for both reading and writing.
RPageStorage(std::string_view name)
Stores information about the cluster in which this page resides.
Definition RPage.hxx:52
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:43
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
Definition RPage.cxx:22
const ROOT::RFieldBase * GetSourceField(const ROOT::RFieldBase *target) const
bool TryEvict(std::size_t targetAvailableSize, std::size_t pageSizeLimit)
Flush columns in order of allocated write page size until the sum of all write page allocations leave...
bool TryUpdate(ROOT::Internal::RColumn &column, std::size_t newWritePageSize)
Try to register the new write page size for the given column.
The window of element indexes of a particular column in a particular cluster.
Records the partition of data into pages for a particular column in a particular cluster.
Metadata for RNTuple clusters.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
A field translates read and write calls from/to underlying columns to/from tree values.
The on-storage metadata of an RNTuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
The RNTupleModel encapulates the schema of an RNTuple.
const std::string & GetDescription() const
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
const_iterator begin() const
const_iterator end() const
void ThrowOnError()
Short-hand method to throw an exception in the case of errors.
Definition RError.hxx:289
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
ROOT::RFieldZero & GetFieldZeroOfModel(RNTupleModel &model)
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
RProjectedFields & GetProjectedFieldsOfModel(RNTupleModel &model)
std::unique_ptr< RColumnElementBase > GenerateColumnElement(std::type_index inMemoryType, ROOT::ENTupleColumnType onDiskType)
void CallConnectPageSinkOnField(RFieldBase &, ROOT::Internal::RPageSink &, ROOT::NTupleSize_t firstEntry=0)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:151
Every concrete RColumnElement type is identified by its on-disk type (column type) and the in-memory ...
The incremental changes to a RNTupleModel
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:50
Default I/O performance counters that get registered in fMetrics.
Parameters for the SealPage() method.
bool fWriteChecksum
Adds a 8 byte little-endian xxhash3 checksum to the page payload.
std::uint32_t fCompressionSettings
Compression algorithm and level to apply.
void * fBuffer
Location for sealed output. The memory buffer has to be large enough.
const ROOT::Internal::RPage * fPage
Input page to be sealed.
bool fAllowAlias
If false, the output buffer must not point to the input page buffer, which would otherwise be an opti...
const ROOT::Internal::RColumnElementBase * fElement
Corresponds to the page's elements, for size calculation etc.
Cluster that was staged, but not yet logically appended to the RNTuple.
Default I/O performance counters that get registered in fMetrics
Used in SetEntryRange / GetEntryRange.
bool IntersectsWith(const ROOT::RClusterDescriptor &clusterDesc) const
Returns true if the given cluster has entries within the entry range.
Summarizes meta-data necessary to load a certain page. Used by LoadPageFromSummary().
A sealed page contains the bytes of a page as written to storage (packed & compressed).
RResult< void > VerifyChecksumIfEnabled() const
RResult< std::uint64_t > GetChecksum() const
Returns a failure if the sealed page has no checksum.
bool operator>(const RColumnInfo &other) const
Information about a single page in the context of a cluster's page range.