Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageStorageDaos.cxx
Go to the documentation of this file.
1/// \file RPageStorageDaos.cxx
2/// \ingroup NTuple
3/// \author Javier Lopez-Gomez <j.lopez@cern.ch>
4/// \date 2020-11-03
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RCluster.hxx>
17#include <ROOT/RClusterPool.hxx>
18#include <ROOT/RLogger.hxx>
20#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleTypes.hxx>
24#include <ROOT/RNTupleUtils.hxx>
25#include <ROOT/RNTupleZip.hxx>
26#include <ROOT/RPage.hxx>
28#include <ROOT/RPagePool.hxx>
29#include <ROOT/RDaos.hxx>
31
32#include <RVersion.h>
33#include <TError.h>
34
35#include <algorithm>
36#include <cstdio>
37#include <cstdlib>
38#include <cstring>
39#include <limits>
40#include <utility>
41#include <regex>
42#include <cassert>
43
44namespace {
53
54/// \brief RNTuple page-DAOS mappings
55enum EDaosMapping { kOidPerCluster, kOidPerPage };
56
57struct RDaosKey {
58 daos_obj_id_t fOid;
59 DistributionKey_t fDkey;
60 AttributeKey_t fAkey;
61};
62
63/// \brief Pre-defined keys for object store. `kDistributionKeyDefault` is the distribution key for metadata and
64/// pagelist values; optionally it can be used for ntuple pages (if under the `kOidPerPage` mapping strategy).
65/// `kAttributeKeyDefault` is the attribute key for ntuple pages under `kOidPerPage`.
66/// `kAttributeKey{Anchor,Header,Footer}` are the respective attribute keys for anchor/header/footer metadata elements.
67static constexpr DistributionKey_t kDistributionKeyDefault = 0x5a3c69f0cafe4a11;
68static constexpr AttributeKey_t kAttributeKeyDefault = 0x4243544b53444229;
69static constexpr AttributeKey_t kAttributeKeyAnchor = 0x4243544b5344422a;
70static constexpr AttributeKey_t kAttributeKeyHeader = 0x4243544b5344422b;
71static constexpr AttributeKey_t kAttributeKeyFooter = 0x4243544b5344422c;
72
73/// \brief Pre-defined 64 LSb of the OIDs for ntuple metadata (holds anchor/header/footer) and clusters' pagelists.
74static constexpr decltype(daos_obj_id_t::lo) kOidLowMetadata = -1;
75static constexpr decltype(daos_obj_id_t::lo) kOidLowPageList = -2;
76
77static constexpr daos_oclass_id_t kCidMetadata = OC_SX;
78
79static constexpr EDaosMapping kDefaultDaosMapping = kOidPerCluster;
80
81template <EDaosMapping mapping>
83 long unsigned columnId, long unsigned pageCount)
84{
85 if constexpr (mapping == kOidPerCluster) {
86 return RDaosKey{daos_obj_id_t{static_cast<decltype(daos_obj_id_t::lo)>(clusterId),
87 static_cast<decltype(daos_obj_id_t::hi)>(ntplId)},
88 static_cast<DistributionKey_t>(columnId), static_cast<AttributeKey_t>(pageCount)};
89 } else if constexpr (mapping == kOidPerPage) {
90 return RDaosKey{daos_obj_id_t{static_cast<decltype(daos_obj_id_t::lo)>(pageCount),
91 static_cast<decltype(daos_obj_id_t::hi)>(ntplId)},
93 }
94}
95
96struct RDaosURI {
97 /// \brief Label of the DAOS pool
98 std::string fPoolLabel;
99 /// \brief Label of the container for this RNTuple
100 std::string fContainerLabel;
101};
102
103/**
104 \brief Parse a DAOS RNTuple URI of the form 'daos://pool_id/container_id'.
105*/
106RDaosURI ParseDaosURI(std::string_view uri)
107{
108 std::regex re("daos://([^/]+)/(.+)");
109 std::cmatch m;
110 if (!std::regex_match(uri.data(), m, re))
111 throw ROOT::RException(R__FAIL("Invalid DAOS pool URI."));
112 return {m[1], m[2]};
113}
114
115/// \brief Unpacks a 64-bit RNTuple page locator address for object stores into a pair of 32-bit values:
116/// the attribute key under which the cage is stored and the offset within that cage to access the page.
117std::pair<uint32_t, uint32_t> DecodeDaosPagePosition(const ROOT::RNTupleLocatorObject64 &address)
118{
119 auto position = static_cast<uint32_t>(address.GetLocation() & 0xFFFFFFFF);
120 auto offset = static_cast<uint32_t>(address.GetLocation() >> 32);
121 return {position, offset};
122}
123
124/// \brief Packs an attribute key together with an offset within its contents into a single 64-bit address.
125/// The offset is kept in the MSb half and defaults to zero, which is the case when caging is disabled.
126ROOT::RNTupleLocatorObject64 EncodeDaosPagePosition(uint64_t position, uint64_t offset = 0)
127{
128 uint64_t address = (position & 0xFFFFFFFF) | (offset << 32);
129 return ROOT::RNTupleLocatorObject64{address};
130}
131
132/// \brief Helper structure concentrating the functionality required to locate an ntuple within a DAOS container.
133/// It includes a hashing function that converts the RNTuple's name into a 32-bit identifier; this value is used to
134/// index the subspace for the ntuple among all objects in the container. A zero-value hash value is reserved for
135/// storing any future metadata related to container-wide management; a zero-index ntuple is thus disallowed and
136/// remapped to "1". Once the index is computed, `InitNTupleDescriptorBuilder()` can be called to return a
137/// partially-filled builder with the ntuple's anchor, header and footer, lacking only pagelists. Upon that call,
138/// a copy of the anchor is stored in `fAnchor`.
139struct RDaosContainerNTupleLocator {
140 std::string fName{};
141 ntuple_index_t fIndex{};
142 std::optional<ROOT::Experimental::Internal::RDaosNTupleAnchor> fAnchor;
143 static const ntuple_index_t kReservedIndex = 0;
144
145 RDaosContainerNTupleLocator() = default;
146 explicit RDaosContainerNTupleLocator(const std::string &ntupleName) : fName(ntupleName), fIndex(Hash(ntupleName)){};
147
148 bool IsValid() { return fAnchor.has_value() && fAnchor->fNBytesHeader; }
149 [[nodiscard]] ntuple_index_t GetIndex() const { return fIndex; };
150 static ntuple_index_t Hash(const std::string &ntupleName)
151 {
152 // Convert string to numeric representation via `std::hash`.
153 uint64_t h = std::hash<std::string>{}(ntupleName);
154 // Fold the hash into 32-bit using `boost::hash_combine()` algorithm and magic number.
155 auto seed = static_cast<uint32_t>(h >> 32);
156 seed ^= static_cast<uint32_t>(h & 0xffffffff) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
157 auto hash = static_cast<ntuple_index_t>(seed);
158 return (hash == kReservedIndex) ? kReservedIndex + 1 : hash;
159 }
160
163 {
164 std::unique_ptr<unsigned char[]> buffer, zipBuffer;
165 auto &anchor = fAnchor.emplace();
166 int err;
167
169 daos_obj_id_t oidMetadata{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(this->GetIndex())};
170
172 if ((err = cont.ReadSingleAkey(buffer.get(), anchorSize, oidMetadata, kDistributionKeyDefault,
174 return err;
175 }
176
177 anchor.Deserialize(buffer.get(), anchorSize).Unwrap();
178
179 builder.SetVersion(anchor.fVersionEpoch, anchor.fVersionMajor, anchor.fVersionMinor, anchor.fVersionPatch);
180 builder.SetOnDiskHeaderSize(anchor.fNBytesHeader);
181 buffer = MakeUninitArray<unsigned char>(anchor.fLenHeader);
183 if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesHeader, oidMetadata, kDistributionKeyDefault,
185 return err;
186 RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesHeader, anchor.fLenHeader, buffer.get());
187 RNTupleSerializer::DeserializeHeader(buffer.get(), anchor.fLenHeader, builder);
188
189 builder.AddToOnDiskFooterSize(anchor.fNBytesFooter);
190 buffer = MakeUninitArray<unsigned char>(anchor.fLenFooter);
192 if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesFooter, oidMetadata, kDistributionKeyDefault,
194 return err;
195 RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesFooter, anchor.fLenFooter, buffer.get());
196 RNTupleSerializer::DeserializeFooter(buffer.get(), anchor.fLenFooter, builder);
197
198 return 0;
199 }
200
201 static std::pair<RDaosContainerNTupleLocator, ROOT::Internal::RNTupleDescriptorBuilder>
203 {
204 auto result = std::make_pair(RDaosContainerNTupleLocator(ntupleName), ROOT::Internal::RNTupleDescriptorBuilder());
205
206 auto &loc = result.first;
207 auto &builder = result.second;
208
209 if (int err = loc.InitNTupleDescriptorBuilder(cont, builder); !err) {
210 if (ntupleName.empty() || ntupleName != builder.GetDescriptor().GetName()) {
211 // Hash already taken by a differently-named ntuple.
212 throw ROOT::RException(
213 R__FAIL("LocateNTuple: ntuple name '" + ntupleName + "' unavailable in this container."));
214 }
215 }
216 return result;
217 }
218};
219
220} // anonymous namespace
221
222////////////////////////////////////////////////////////////////////////////////
223
241
244{
245 if (bufSize < 32)
246 return R__FAIL("DAOS anchor too short");
247
248 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
250 if (fVersionAnchor != RDaosNTupleAnchor().fVersionAnchor) {
251 return R__FAIL("unsupported DAOS anchor version: " + std::to_string(fVersionAnchor));
252 }
253
263 if (!result)
264 return R__FORWARD_ERROR(result);
265 return result.Unwrap() + 32;
266}
267
272
273////////////////////////////////////////////////////////////////////////////////
274
276 const ROOT::RNTupleWriteOptions &options)
277 : RPagePersistentSink(ntupleName, options), fURI(uri)
278{
279 static std::once_flag once;
280 std::call_once(once, []() {
281 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "The DAOS backend is experimental and still under development. "
282 << "Do not store real data with this version of RNTuple!";
283 });
284 EnableDefaultMetrics("RPageSinkDaos");
285}
286
288
290{
291 auto opts = dynamic_cast<RNTupleWriteOptionsDaos *>(fOptions.get());
292 fNTupleAnchor.fObjClass = opts ? opts->GetObjectClass() : RNTupleWriteOptionsDaos().GetObjectClass();
293 auto oclass = RDaosObject::ObjClassId(fNTupleAnchor.fObjClass);
294 if (oclass.IsUnknown())
295 throw ROOT::RException(R__FAIL("Unknown object class " + fNTupleAnchor.fObjClass));
296
297 size_t cageSz = opts ? opts->GetMaxCageSize() : RNTupleWriteOptionsDaos().GetMaxCageSize();
298 size_t pageSz = opts ? opts->GetMaxUnzippedPageSize() : RNTupleWriteOptionsDaos().GetMaxUnzippedPageSize();
299 fCageSizeLimit = std::max(cageSz, pageSz);
300
301 auto args = ParseDaosURI(fURI);
302 auto pool = std::make_shared<RDaosPool>(args.fPoolLabel);
303
304 fDaosContainer = std::make_unique<RDaosContainer>(pool, args.fContainerLabel, /*create =*/true);
305 fDaosContainer->SetDefaultObjectClass(oclass);
306
307 auto [locator, _] = RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName);
308 fNTupleIndex = locator.GetIndex();
309
311 auto szZipHeader =
312 RNTupleCompressor::Zip(serializedHeader, length, GetWriteOptions().GetCompression(), zipBuffer.get());
313 WriteNTupleHeader(zipBuffer.get(), szZipHeader, length);
314}
315
318{
319 auto element = columnHandle.fColumn->GetElement();
321 {
322 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallZip, fCounters->fTimeCpuZip);
323 sealedPage = SealPage(page, *element);
324 }
325
326 fCounters->fSzZip.Add(page.GetNBytes());
327 return CommitSealedPageImpl(columnHandle.fPhysicalId, sealedPage);
328}
329
333{
334 auto offsetData = fPageId.fetch_add(1);
335 ROOT::DescriptorId_t clusterId = fDescriptorBuilder.GetDescriptor().GetNActiveClusters();
336
337 {
338 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
340 fDaosContainer->WriteSingleAkey(sealedPage.GetBuffer(), sealedPage.GetBufferSize(), daosKey.fOid, daosKey.fDkey,
341 daosKey.fAkey);
342 }
343
346 result.SetNBytesOnStorage(sealedPage.GetDataSize());
348 fCounters->fNPageCommitted.Inc();
349 fCounters->fSzWritePayload.Add(sealedPage.GetBufferSize());
350 fNBytesCurrentCluster += sealedPage.GetBufferSize();
351 return result;
352}
353
354std::vector<ROOT::RNTupleLocator>
355ROOT::Experimental::Internal::RPageSinkDaos::CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges,
356 const std::vector<bool> &mask)
357{
359 std::vector<RNTupleLocator> locators;
360 auto nPages = mask.size();
361 locators.reserve(nPages);
362
363 const uint32_t maxCageSz = fCageSizeLimit;
364 const bool useCaging = fCageSizeLimit > 0;
365 const std::uint8_t locatorFlags = useCaging ? EDaosLocatorFlags::kCagedPage : 0;
366
367 ROOT::DescriptorId_t clusterId = fDescriptorBuilder.GetDescriptor().GetNActiveClusters();
368 int64_t payloadSz = 0;
369 std::size_t positionOffset;
370 uint32_t positionIndex;
371
372 /// Aggregate batch of requests by object ID and distribution key, determined by the ntuple-DAOS mapping
373 for (auto &range : ranges) {
374 positionOffset = 0;
375 /// Under caging, the atomic page counter is fetch-incremented for every column range to get the position of its
376 /// first cage and indicate the next one, also ensuring subsequent pages of different columns do not end up caged
377 /// together. This increment is not necessary in the absence of caging, as each page is trivially caged.
378 positionIndex = useCaging ? fPageId.fetch_add(1) : fPageId.load();
379
380 for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) {
382
384 positionOffset = 0;
385 positionIndex = fPageId.fetch_add(1);
386 }
387
389 d_iov_set(&pageIov, const_cast<void *>(s.GetBuffer()), s.GetBufferSize());
390
391 RDaosKey daosKey =
392 GetPageDaosKey<kDefaultDaosMapping>(fNTupleIndex, clusterId, range.fPhysicalColumnId, positionIndex);
395 it->second.Insert(daosKey.fAkey, pageIov);
396
399 locator.SetNBytesOnStorage(s.GetDataSize());
401 locator.SetReserved(locatorFlags);
402 locators.push_back(locator);
403
406 }
407 }
408 fNBytesCurrentCluster += payloadSz;
409
410 {
411 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallWrite, fCounters->fTimeCpuWrite);
412 if (int err = fDaosContainer->WriteV(writeRequests))
413 throw ROOT::RException(R__FAIL("WriteV: error" + std::string(d_errstr(err))));
414 }
415
416 fCounters->fNPageCommitted.Add(nPages);
417 fCounters->fSzWritePayload.Add(payloadSz);
418
419 return locators;
420}
421
423{
424 return std::exchange(fNBytesCurrentCluster, 0);
425}
426
429 std::uint32_t length)
430{
432 auto szPageListZip =
433 RNTupleCompressor::Zip(serializedPageList, length, GetWriteOptions().GetCompression(), bufPageListZip.get());
434
435 auto offsetData = fClusterGroupId.fetch_add(1);
436 fDaosContainer->WriteSingleAkey(
438 daos_obj_id_t{kOidLowPageList, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)}, kDistributionKeyDefault,
442 result.SetNBytesOnStorage(szPageListZip);
444 fCounters->fSzWritePayload.Add(static_cast<int64_t>(szPageListZip));
445 return result;
446}
447
449 std::uint32_t length)
450{
452 auto szFooterZip =
453 RNTupleCompressor::Zip(serializedFooter, length, GetWriteOptions().GetCompression(), bufFooterZip.get());
454 WriteNTupleFooter(bufFooterZip.get(), szFooterZip, length);
455 WriteNTupleAnchor();
456}
457
459{
460 fDaosContainer->WriteSingleAkey(
461 data, nbytes, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
463 fNTupleAnchor.fLenHeader = lenHeader;
464 fNTupleAnchor.fNBytesHeader = nbytes;
465}
466
468{
469 fDaosContainer->WriteSingleAkey(
470 data, nbytes, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
472 fNTupleAnchor.fLenFooter = lenFooter;
473 fNTupleAnchor.fNBytesFooter = nbytes;
474}
475
477{
480 fNTupleAnchor.Serialize(buffer.get());
481 fDaosContainer->WriteSingleAkey(
482 buffer.get(), ntplSize, daos_obj_id_t{kOidLowMetadata, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)},
484}
485
486////////////////////////////////////////////////////////////////////////////////
487
489 const ROOT::RNTupleReadOptions &options)
490 : RPageSource(ntupleName, options),
491 fURI(uri),
492 fClusterPool(std::make_unique<ROOT::Internal::RClusterPool>(
493 *this, ROOT::Internal::RNTupleReadOptionsManip::GetClusterBunchSize(options)))
494{
495 EnableDefaultMetrics("RPageSourceDaos");
496
497 auto args = ParseDaosURI(uri);
498 auto pool = std::make_shared<RDaosPool>(args.fPoolLabel);
499 fDaosContainer = std::make_unique<RDaosContainer>(pool, args.fContainerLabel);
500}
501
503
506{
508 std::unique_ptr<unsigned char[]> buffer, zipBuffer;
509
510 auto [locator, descBuilder] = RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName);
511 if (!locator.IsValid())
512 throw ROOT::RException(
513 R__FAIL("Attach: requested ntuple '" + fNTupleName + "' is not present in DAOS container."));
514
515 auto oclass = RDaosObject::ObjClassId(locator.fAnchor->fObjClass);
516 if (oclass.IsUnknown())
517 throw ROOT::RException(R__FAIL("Attach: unknown object class " + locator.fAnchor->fObjClass));
518
519 fDaosContainer->SetDefaultObjectClass(oclass);
520 fNTupleIndex = locator.GetIndex();
522
523 auto desc = descBuilder.MoveDescriptor();
524
525 for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
526 buffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLength());
527 zipBuffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLocator().GetNBytesOnStorage());
528 fDaosContainer->ReadSingleAkey(
529 zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
530 cgDesc.GetPageListLocator().GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
531 RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(),
532 cgDesc.GetPageListLength(), buffer.get());
533
534 RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
535 }
536
537 return desc;
538}
539
541{
542 return fDaosContainer->GetDefaultObjectClass().ToString();
543}
544
548{
549 const auto clusterId = localIndex.GetClusterId();
550
552 {
553 auto descriptorGuard = GetSharedDescriptorGuard();
554 const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
555 pageInfo = clusterDescriptor.GetPageRange(physicalColumnId).Find(localIndex.GetIndexInCluster());
556 }
557
558 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
559 sealedPage.SetNElements(pageInfo.GetNElements());
560 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
561 if (!sealedPage.GetBuffer())
562 return;
563
564 if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
565 assert(!pageInfo.HasChecksum());
566 memcpy(const_cast<void *>(sealedPage.GetBuffer()), ROOT::Internal::RPage::GetPageZeroBuffer(),
567 sealedPage.GetBufferSize());
568 return;
569 }
570
571 if (pageInfo.GetLocator().GetReserved() & EDaosLocatorFlags::kCagedPage) {
572 // Suboptimal but hard to do differently: we load the full cage up to and including the requested page.
573 // In practice, individual LoadSealedPage calls are rare and usually full clusters are buffered.
574 // The support for extracting individual pages from a cage makes testing easier, however.
575 const auto [position, offset] =
576 DecodeDaosPagePosition(pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>());
577 RDaosKey daosKey = GetPageDaosKey<kDefaultDaosMapping>(fNTupleIndex, clusterId, physicalColumnId, position);
578 const auto bufSize = offset + sealedPage.GetBufferSize();
580 fDaosContainer->ReadSingleAkey(cageHeadBuffer.get(), bufSize, daosKey.fOid, daosKey.fDkey, daosKey.fAkey);
581 memcpy(const_cast<void *>(sealedPage.GetBuffer()), cageHeadBuffer.get() + offset, sealedPage.GetBufferSize());
582 } else {
583 RDaosKey daosKey =
585 pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation());
586 fDaosContainer->ReadSingleAkey(const_cast<void *>(sealedPage.GetBuffer()), sealedPage.GetBufferSize(),
587 daosKey.fOid, daosKey.fDkey, daosKey.fAkey);
588 }
589
590 sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
591}
592
596{
597 const auto columnId = columnHandle.fPhysicalId;
598 const auto clusterId = clusterInfo.fClusterId;
599 const auto &pageInfo = clusterInfo.fPageInfo;
600
601 const auto element = columnHandle.fColumn->GetElement();
602 const auto elementSize = element->GetSize();
603 const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;
604
605 if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
606 auto pageZero = fPageAllocator->NewPage(elementSize, pageInfo.GetNElements());
607 pageZero.GrowUnchecked(pageInfo.GetNElements());
608 memset(pageZero.GetBuffer(), 0, pageZero.GetNBytes());
609 pageZero.SetWindow(clusterInfo.fColumnOffset + pageInfo.GetFirstElementIndex(),
611 return fPagePool.RegisterPage(std::move(pageZero),
613 }
614
616 sealedPage.SetNElements(pageInfo.GetNElements());
617 sealedPage.SetHasChecksum(pageInfo.HasChecksum());
618 sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
619 std::unique_ptr<unsigned char[]> directReadBuffer; // only used if cluster pool is turned off
620
621 if (fOptions.GetClusterCache() == ROOT::RNTupleReadOptions::EClusterCache::kOff) {
622 if (pageInfo.GetLocator().GetReserved() & EDaosLocatorFlags::kCagedPage) {
623 throw ROOT::RException(R__FAIL("accessing caged pages is only supported in conjunction with cluster cache"));
624 }
625
628 fNTupleIndex, clusterId, columnId, pageInfo.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation());
629 fDaosContainer->ReadSingleAkey(directReadBuffer.get(), sealedPage.GetBufferSize(), daosKey.fOid, daosKey.fDkey,
630 daosKey.fAkey);
631 fCounters->fNPageRead.Inc();
632 fCounters->fNRead.Inc();
633 fCounters->fSzReadPayload.Add(sealedPage.GetBufferSize());
634 sealedPage.SetBuffer(directReadBuffer.get());
635 } else {
636 if (!fCurrentCluster || (fCurrentCluster->GetId() != clusterId) || !fCurrentCluster->ContainsColumn(columnId))
637 fCurrentCluster = fClusterPool->GetCluster(clusterId, fActivePhysicalColumns.ToColumnSet());
638 R__ASSERT(fCurrentCluster->ContainsColumn(columnId));
639
642 if (!cachedPageRef.Get().IsNull())
643 return cachedPageRef;
644
646 auto onDiskPage = fCurrentCluster->GetOnDiskPage(key);
647 R__ASSERT(onDiskPage && (sealedPage.GetBufferSize() == onDiskPage->GetSize()));
648 sealedPage.SetBuffer(onDiskPage->GetAddress());
649 }
650
652 {
653 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallUnzip, fCounters->fTimeCpuUnzip);
654 newPage = UnsealPage(sealedPage, *element).Unwrap();
655 fCounters->fSzUnzip.Add(elementSize * pageInfo.GetNElements());
656 }
657
658 newPage.SetWindow(clusterInfo.fColumnOffset + pageInfo.GetFirstElementIndex(),
660 fCounters->fNPageUnsealed.Inc();
661 return fPagePool.RegisterPage(std::move(newPage), ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType});
662}
663
664std::unique_ptr<ROOT::Internal::RPageSource> ROOT::Experimental::Internal::RPageSourceDaos::CloneImpl() const
665{
666 auto clone = new RPageSourceDaos(fNTupleName, fURI, fOptions);
667 return std::unique_ptr<RPageSourceDaos>(clone);
668}
669
670std::vector<std::unique_ptr<RCluster>>
672{
674 ROOT::DescriptorId_t fClusterId = 0;
675 ROOT::DescriptorId_t fColumnId = 0;
676 ROOT::NTupleSize_t fPageNo = 0;
677 std::uint64_t fPosition = 0;
678 std::uint64_t fCageOffset = 0;
679 std::uint64_t fDataSize = 0; // page payload
680 std::uint64_t fBufferSize = 0; // page payload + checksum (if available)
681 };
682
683 // Prepares read requests for a single cluster; `readRequests` is modified by this function. Requests are coalesced
684 // by OID and distribution key.
685 // TODO(jalopezg): this may be a private member function; that, however, requires additional changes given that
686 // `RDaosContainer::MultiObjectRWOperation_t` cannot be forward-declared
689 auto clusterId = clusterKey.fClusterId;
690 // Group page locators by their position in the object store; with caging enabled, this facilitates the
691 // processing of cages' requests together into a single IOV to be loaded.
692 std::unordered_map<std::uint32_t, std::vector<RDaosSealedPageLocator>> onDiskPages;
693
694 unsigned clusterBufSz = 0, nPages = 0;
695 auto pageZeroMap = std::make_unique<ROOT::Internal::ROnDiskPageMap>();
696 PrepareLoadCluster(
700 const auto &pageLocator = pageInfo.GetLocator();
701 uint32_t position, offset;
702 std::tie(position, offset) = DecodeDaosPagePosition(pageLocator.GetPosition<RNTupleLocatorObject64>());
703 auto [itLoc, _] = onDiskPages.emplace(position, std::vector<RDaosSealedPageLocator>());
704 auto pageBufferSize = pageLocator.GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum;
705
706 itLoc->second.push_back({clusterId, physicalColumnId, pageNo, position, offset,
707 pageLocator.GetNBytesOnStorage(), pageBufferSize});
708 ++nPages;
710 });
711
712 auto clusterBuffer = new unsigned char[clusterBufSz];
713 auto pageMap =
714 std::make_unique<ROOT::Internal::ROnDiskPageMapHeap>(std::unique_ptr<unsigned char[]>(clusterBuffer));
715
717 // Fill the cluster page map and the read requests for the RDaosContainer::ReadV() call
718 for (auto &[cageIndex, pageVec] : onDiskPages) {
719 auto columnId = pageVec[0].fColumnId; // All pages in a cage belong to the same column
720 std::size_t cageSz = 0;
721
722 for (auto &s : pageVec) {
723 assert(columnId == s.fColumnId);
724 assert(cageIndex == s.fPosition);
725 // Register the on disk pages in a page map
726 ROOT::Internal::ROnDiskPage::Key key(s.fColumnId, s.fPageNo);
727 pageMap->Register(key, ROOT::Internal::ROnDiskPage(cageBuffer + s.fCageOffset, s.fBufferSize));
728 cageSz += s.fBufferSize;
729 }
730
731 // Prepare new read request batched up by object ID and distribution key
732 d_iov_t iov;
734
738 itReq->second.Insert(daosKey.fAkey, iov);
739
741 }
742 fCounters->fNPageRead.Add(nPages);
743 fCounters->fSzReadPayload.Add(clusterBufSz);
744
745 auto cluster = std::make_unique<RCluster>(clusterId);
746 cluster->Adopt(std::move(pageMap));
747 cluster->Adopt(std::move(pageZeroMap));
748 for (auto colId : clusterKey.fPhysicalColumnSet)
749 cluster->SetColumnAvailable(colId);
750 return cluster;
751 };
752
753 fCounters->fNClusterLoaded.Add(clusterKeys.size());
754
755 std::vector<std::unique_ptr<ROOT::Internal::RCluster>> clusters;
757 for (auto key : clusterKeys) {
758 clusters.emplace_back(fnPrepareSingleCluster(key, readRequests));
759 }
760
761 {
762 Detail::RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
763 if (int err = fDaosContainer->ReadV(readRequests))
764 throw ROOT::RException(R__FAIL("ReadV: error" + std::string(d_errstr(err))));
765 }
766 fCounters->fNReadV.Inc();
767 fCounters->fNRead.Add(readRequests.size());
768
769 return clusters;
770}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:304
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define h(i)
Definition RSha256.hxx:106
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
UInt_t Hash(const TString &s)
Definition TString.h:502
#define _(A, B)
Definition cfortran.h:108
A RDaosContainer provides read/write access to objects in a given container.
Definition RDaos.hxx:157
RDaosObject::DistributionKey_t DistributionKey_t
Definition RDaos.hxx:160
std::unordered_map< ROidDkeyPair, RWOperation, ROidDkeyPair::Hash > MultiObjectRWOperation_t
Definition RDaos.hxx:231
RDaosObject::AttributeKey_t AttributeKey_t
Definition RDaos.hxx:161
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const ROOT::Internal::RPage &page) final
std::vector< RNTupleLocator > CommitSealedPageVImpl(std::span< RPageStorage::RSealedPageGroup > ranges, const std::vector< bool > &mask) final
Vector commit of preprocessed pages.
void WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
std::uint64_t StageClusterImpl() final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
void WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final
RPageSinkDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleWriteOptions &options)
RNTupleLocator CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final
Storage provider that reads ntuple pages from a DAOS container.
std::string GetObjectClass() const
Return the object class used for user data OIDs in this ntuple.
std::unique_ptr< RPageSource > CloneImpl() const final
The cloned page source creates a new connection to the pool/container.
ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final
std::vector< std::unique_ptr< ROOT::Internal::RCluster > > LoadClusters(std::span< ROOT::Internal::RCluster::RKey > clusterKeys) final
Populates all the pages of the given cluster ids and columns; it is possible that some columns do not...
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final
Read the packed and compressed bytes of a page into the memory buffer provided by sealedPage.
std::unique_ptr< RDaosContainer > fDaosContainer
A container that stores object data (header/footer, pages, etc.)
RPageSourceDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleReadOptions &options)
ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) final
LoadStructureImpl() has been called before AttachImpl() is called
DAOS-specific user-tunable settings for storing ntuples.
Managed a set of clusters containing compressed and packed pages.
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:148
Helper class to compress data blocks in the ROOT compression frame format.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
A helper class for piece-wise construction of an RNTupleDescriptor.
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
const RNTupleDescriptor & GetDescriptor() const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val)
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< void > DeserializePageList(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId, RNTupleDescriptor &desc, EDescriptorDeserializeMode mode)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:41
Base class for a sink with a physical storage backend.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSink.
Reference to a page stored in the page pool.
Abstract interface to read data from an ntuple.
void EnableDefaultMetrics(const std::string &prefix)
Enables the default set of metrics provided by RPageSource.
Stores information about the cluster in which this page resides.
Definition RPage.hxx:53
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:44
static const void * GetPageZeroBuffer()
Return a pointer to the page zero buffer used if there is no on-disk data for a particular deferred c...
Definition RPage.cxx:23
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
The on-storage metadata of an RNTuple.
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
RNTupleLocator payload that is common for object stores using 64bit location information.
std::uint64_t GetLocation() const
Generic information about the physical location of data.
Common user-tunable settings for reading RNTuples.
Common user-tunable settings for storing RNTuples.
std::size_t GetMaxUnzippedPageSize() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:198
const char * d_errstr(int rc)
static void d_iov_set(d_iov_t *iov, void *buf, size_t size)
Definition daos.h:50
uint16_t daos_oclass_id_t
Definition daos.h:135
@ OC_SX
Definition daos.h:129
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
Namespace for new ROOT classes and functions.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
A pair of <object ID, distribution key> that can be used to issue a fetch/update request for multiple...
Definition RDaos.hxx:166
Describes a read/write operation on multiple attribute keys under the same object ID and distribution...
Definition RDaos.hxx:190
Entry point for an RNTuple in a DAOS container.
std::uint32_t fNBytesFooter
The size of the compressed ntuple footer.
std::uint64_t fVersionAnchor
Allows for evolving the struct in future versions.
std::string fObjClass
The object class for user data OIDs, e.g. SX
std::uint16_t fVersionEpoch
Version of the binary format supported by the writer.
RResult< std::uint32_t > Deserialize(const void *buffer, std::uint32_t bufSize)
std::uint32_t fLenHeader
The size of the uncompressed ntuple header.
std::uint32_t fLenFooter
The size of the uncompressed ntuple footer.
std::uint32_t fNBytesHeader
The size of the compressed ntuple header.
static constexpr std::size_t kOCNameMaxLength
This limit is currently not defined in any header and any call to daos_oclass_id2name() within DAOS u...
Definition RDaos.hxx:108
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:152
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:51
Summarizes cluster-level information that are necessary to load a certain page.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Information about a single page in the context of a cluster's page range.
iovec for memory buffer
Definition daos.h:37
uint64_t hi
Definition daos.h:147
uint64_t lo
Definition daos.h:146
TMarker m
Definition textangle.C:8