40 , fMetrics(
"RPageSinkRoot")
44 "Do not store real data with this version of RNTuple!";
54 , fMetrics(
"RPageSinkRoot")
58 "Do not store real data with this version of RNTuple!";
67 , fMetrics(
"RPageSinkRoot")
71 "Do not store real data with this version of RNTuple!";
72 fWriter = std::unique_ptr<Internal::RNTupleFileWriter>(
84 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
85 auto szHeader = descriptor.SerializeHeader(
nullptr);
86 auto buffer = std::unique_ptr<unsigned char[]>(
new unsigned char[szHeader]);
87 descriptor.SerializeHeader(buffer.get());
89 auto zipBuffer = std::unique_ptr<unsigned char[]>(
new unsigned char[szHeader]);
90 auto szZipHeader = fCompressor(buffer.get(), szHeader, fOptions.GetCompression(),
91 [&zipBuffer](
const void *
b,
size_t n,
size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
92 fWriter->WriteNTupleHeader(zipBuffer.get(), szZipHeader, szHeader);
99 unsigned char *buffer =
reinterpret_cast<unsigned char *
>(page.
GetBuffer());
100 bool isAdoptedBuffer =
true;
101 auto packedBytes = page.
GetSize();
103 const auto isMappable = element->
IsMappable();
106 packedBytes = (page.
GetNElements() * element->GetBitsOnStorage() + 7) / 8;
107 buffer =
new unsigned char[packedBytes];
108 isAdoptedBuffer =
false;
111 auto zippedBytes = packedBytes;
113 if (fOptions.GetCompression() != 0) {
114 zippedBytes = fCompressor(buffer, packedBytes, fOptions.GetCompression());
115 if (!isAdoptedBuffer)
117 buffer =
const_cast<unsigned char *
>(
reinterpret_cast<const unsigned char *
>(fCompressor.GetZipBuffer()));
118 isAdoptedBuffer =
true;
121 auto offsetData = fWriter->WriteBlob(buffer, zippedBytes, packedBytes);
122 fClusterMinOffset = std::min(offsetData, fClusterMinOffset);
123 fClusterMaxOffset = std::max(offsetData, fClusterMaxOffset);
125 if (!isAdoptedBuffer)
141 fClusterMinOffset = std::uint64_t(-1);
142 fClusterMaxOffset = 0;
149 const auto &descriptor = fDescriptorBuilder.GetDescriptor();
150 auto szFooter = descriptor.SerializeFooter(
nullptr);
151 auto buffer = std::unique_ptr<unsigned char []>(
new unsigned char[szFooter]);
152 descriptor.SerializeFooter(buffer.get());
154 auto zipBuffer = std::unique_ptr<unsigned char[]>(
new unsigned char[szFooter]);
155 auto szZipFooter = fCompressor(buffer.get(), szFooter, fOptions.GetCompression(),
156 [&zipBuffer](
const void *
b,
size_t n,
size_t o){ memcpy(zipBuffer.get() + o, b, n); } );
157 fWriter->WriteNTupleFooter(zipBuffer.get(), szZipFooter, szFooter);
166 nElements = kDefaultElementsPerPage;
168 return fPageAllocator->NewPage(columnHandle.
fId, elementSize, nElements);
173 fPageAllocator->DeletePage(page);
181 ColumnId_t columnId,
void *mem, std::size_t elementSize, std::size_t nElements)
183 RPage newPage(columnId, mem, elementSize * nElements, elementSize);
192 delete[]
reinterpret_cast<unsigned char *
>(page.
GetBuffer());
202 , fMetrics(
"RPageSourceFile")
204 , fPagePool(std::make_shared<
RPagePool>())
227 const auto fNTuple = fReader.GetNTuple(fNTupleName);
229 auto buffer = std::unique_ptr<unsigned char[]>(
new unsigned char[fNTuple.fLenHeader]);
230 auto zipBuffer = std::unique_ptr<unsigned char[]>(
new unsigned char[fNTuple.fNBytesHeader]);
231 fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fSeekHeader);
232 fDecompressor(zipBuffer.get(), fNTuple.fNBytesHeader, fNTuple.fLenHeader, buffer.get());
235 buffer = std::unique_ptr<unsigned char[]>(
new unsigned char[fNTuple.fLenFooter]);
236 zipBuffer = std::unique_ptr<unsigned char[]>(
new unsigned char[fNTuple.fNBytesFooter]);
237 fReader.ReadBuffer(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fSeekFooter);
238 fDecompressor(zipBuffer.get(), fNTuple.fNBytesFooter, fNTuple.fLenFooter, buffer.get());
248 const auto columnId = columnHandle.
fId;
249 const auto clusterId = clusterDescriptor.
GetId();
250 const auto &pageRange = clusterDescriptor.
GetPageRange(columnId);
254 decltype(clusterIndex) firstInPage = 0;
255 for (
const auto &
pi : pageRange.fPageInfos) {
256 if (firstInPage +
pi.fNElements > clusterIndex) {
260 firstInPage +=
pi.fNElements;
266 const auto elementSize = element->
GetSize();
269 auto pageBuffer =
new unsigned char[
270 std::max(pageSize,
static_cast<std::uint32_t
>(elementSize * pageInfo.
fNElements))];
273 const auto bytesOnStorage = (element->GetBitsOnStorage() * pageInfo.
fNElements + 7) / 8;
274 if (pageSize != bytesOnStorage) {
275 fDecompressor(pageBuffer, pageSize, bytesOnStorage);
276 pageSize = bytesOnStorage;
279 if (!element->IsMappable()) {
281 auto unpackedBuffer =
new unsigned char[pageSize];
282 element->Unpack(unpackedBuffer, pageBuffer, pageInfo.
fNElements);
284 pageBuffer = unpackedBuffer;
288 auto newPage = fPageAllocator->NewPage(columnId, pageBuffer, elementSize, pageInfo.
fNElements);
290 fPagePool->RegisterPage(newPage,
302 const auto columnId = columnHandle.
fId;
303 auto cachedPage = fPagePool->GetPage(columnId, globalIndex);
304 if (!cachedPage.IsNull())
307 const auto clusterId = fDescriptor.FindClusterId(columnId, globalIndex);
309 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
310 const auto selfOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
312 return PopulatePageFromCluster(columnHandle, clusterDescriptor, globalIndex - selfOffset);
320 const auto index = clusterIndex.
GetIndex();
321 const auto columnId = columnHandle.
fId;
322 auto cachedPage = fPagePool->GetPage(columnId, clusterIndex);
323 if (!cachedPage.IsNull())
327 const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
328 return PopulatePageFromCluster(columnHandle, clusterDescriptor, index);
333 fPagePool->ReturnPage(page);
339 clone->fFile = fFile->Clone();
341 return std::unique_ptr<RPageSourceFile>(clone);
#define R__WARNING_HERE(GROUP)
virtual bool IsMappable() const
Derived, typed classes tell whether the on-storage layout is bitwise identical to the memory layout.
std::size_t GetSize() const
RColumnElementBase * GetElement() const
Manages pages read from a the file.
static RPage NewPage(ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
static void DeletePage(const RPage &page)
Uses standard C++ memory allocation for the column data pages.
A closure that can free the memory associated with a mapped page.
A thread-safe cache of column pages.
RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options)
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements=0) final
Get a new, empty page for the given column that can be filled with up to nElements.
void CreateImpl(const RNTupleModel &model) final
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
void CommitDatasetImpl() final
std::unique_ptr< Internal::RNTupleFileWriter > fWriter
RClusterDescriptor::RLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
RClusterDescriptor::RLocator CommitClusterImpl(NTupleSize_t nEntries) final
Abstract interface to write data into an ntuple.
Storage provider that reads ntuple pages from a file.
Internal::RMiniFileReader fReader
Takes the fFile to read ntuple blobs from it.
virtual ~RPageSourceFile()
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
RNTupleDescriptor AttachImpl() final
std::unique_ptr< RPageSource > Clone() const final
The cloned page source creates a new raw file and reader and opens its own file descriptor to the dat...
RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options)
std::unique_ptr< ROOT::Internal::RRawFile > fFile
An RRawFile is used to request the necessary byte ranges from a local or a remote file.
RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final
Allocates and fills a page that contains the index-th element.
Abstract interface to read data from an ntuple.
Stores information about the cluster in which this page resides.
A page is a slice of a column that is mapped into memory.
ClusterSize_t::ValueType GetNElements() const
void * TryGrow(ClusterSize_t::ValueType nElements)
Return a pointer after the last element that has space for nElements new elements.
ClusterSize_t::ValueType GetSize() const
The space taken by column elements in the buffer.
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
static RNTupleFileWriter * Append(std::string_view ntupleName, TFile &file)
Add a new RNTuple identified by ntupleName to the existing TFile.
static RNTupleFileWriter * Recreate(std::string_view ntupleName, std::string_view path, int defaultCompression, ENTupleContainerFormat containerFormat)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
Meta-data for a set of ntuple clusters.
const RPageRange & GetPageRange(DescriptorId_t columnId) const
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
DescriptorId_t GetId() const
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleDescriptor MoveDescriptor()
void SetFromHeader(void *headerBuffer)
void AddClustersFromFooter(void *footerBuffer)
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
Common user-tunable settings for storing ntuples.
int GetCompression() const
ENTupleContainerFormat GetContainerFormat() const
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
constexpr DescriptorId_t kInvalidDescriptorId
static constexpr double pi
NTupleSize_t fFirstElementIndex
A 64bit element index.
Generic information about the physical location of data.
std::uint32_t fBytesOnStorage