Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPageSinkBuf.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageSinkBuf.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Max Orok <maxwellorok@gmail.com>
5/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
6/// \date 2021-03-17
7/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
8/// is welcome!
9
10/*************************************************************************
11 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
12 * All rights reserved. *
13 * *
14 * For the licensing terms see $ROOTSYS/LICENSE. *
15 * For the list of contributors see $ROOTSYS/README/CREDITS. *
16 *************************************************************************/
17
18#ifndef ROOT7_RPageSinkBuf
19#define ROOT7_RPageSinkBuf
20
22#include <ROOT/RPageStorage.hxx>
23
24#include <deque>
25#include <iterator>
26#include <memory>
27#include <tuple>
28
29namespace ROOT {
30namespace Experimental {
31namespace Internal {
32
33// clang-format off
34/**
35\class ROOT::Experimental::Internal::RPageSinkBuf
36\ingroup NTuple
37\brief Wrapper sink that coalesces cluster column page writes
38*/
39// clang-format on
40class RPageSinkBuf : public RPageSink {
41private:
42 /// A buffered column. The column is not responsible for RPage memory management (i.e. ReservePage),
43 /// which is handled by the enclosing RPageSinkBuf.
44 class RColumnBuf {
45 public:
46 struct RPageZipItem {
48 // Compression scratch buffer for fSealedPage.
49 std::unique_ptr<unsigned char[]> fBuf;
51 bool IsSealed() const { return fSealedPage != nullptr; }
52 void AllocateSealedPageBuf(std::size_t nBytes)
53 {
54 fBuf = std::unique_ptr<unsigned char[]>(new unsigned char[nBytes]);
55 }
56 };
57 public:
58 RColumnBuf() = default;
59 RColumnBuf(const RColumnBuf&) = delete;
60 RColumnBuf& operator=(const RColumnBuf&) = delete;
61 RColumnBuf(RColumnBuf&&) = default;
64
65 /// Returns a reference to the newly buffered page. The reference remains
66 /// valid until the return value of DrainBufferedPages() is destroyed.
68 {
69 if (!fCol) {
70 fCol = columnHandle;
71 }
72 // Safety: Insertion at the end of a deque never invalidates references
73 // to existing elements.
74 return fBufferedPages.emplace_back();
75 }
76 const RPageStorage::ColumnHandle_t &GetHandle() const { return fCol; }
77 bool IsEmpty() const { return fBufferedPages.empty(); }
78 bool HasSealedPagesOnly() const { return fBufferedPages.size() == fSealedPages.size(); }
80
81 using BufferedPages_t = std::tuple<std::deque<RPageZipItem>, RPageStorage::SealedPageSequence_t>;
82 /// When the return value of DrainBufferedPages() is destroyed, all references
83 /// returned by GetBuffer are invalidated.
84 /// This function gives up on the ownership of the buffered pages.
86 {
87 BufferedPages_t drained;
88 std::swap(fBufferedPages, std::get<decltype(fBufferedPages)>(drained));
89 std::swap(fSealedPages, std::get<decltype(fSealedPages)>(drained));
90 return drained;
91 }
92 void DropBufferedPages();
93
94 // The returned reference points to a default-constructed RSealedPage. It can be used
95 // to fill in data after sealing.
97 {
98 return fSealedPages.emplace_back();
99 }
100
101 private:
103 /// Using a deque guarantees that element iterators are never invalidated
104 /// by appends to the end of the iterator by BufferPage.
105 std::deque<RPageZipItem> fBufferedPages;
106 /// Pages that have been already sealed by a concurrent task. A vector commit can be issued if all
107 /// buffered pages have been sealed.
108 /// Note that each RSealedPage refers to the same buffer as `fBufferedPages[i].fBuf` for some value of `i`, and
109 /// thus owned by RPageZipItem
111 };
112
113private:
114 /// I/O performance counters that get registered in fMetrics
115 struct RCounters {
119 };
120 std::unique_ptr<RCounters> fCounters;
121 /// The inner sink, responsible for actually performing I/O.
122 std::unique_ptr<RPageSink> fInnerSink;
123 /// The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
124 /// For the unbuffered case, the RNTupleModel is instead managed by a RNTupleWriter.
125 std::unique_ptr<RNTupleModel> fInnerModel;
126 /// Vector of buffered column pages. Indexed by column id.
127 std::vector<RColumnBuf> fBufferedColumns;
128 /// Columns committed as suppressed are stored and passed to the inner sink at cluster commit
129 std::vector<ColumnHandle_t> fSuppressedColumns;
132
133 void ConnectFields(const std::vector<RFieldBase *> &fields, NTupleSize_t firstEntry);
134
135public:
136 explicit RPageSinkBuf(std::unique_ptr<RPageSink> inner);
137 RPageSinkBuf(const RPageSinkBuf&) = delete;
141 ~RPageSinkBuf() override;
142
143 ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
144
145 const RNTupleDescriptor &GetDescriptor() const final;
146
147 void InitImpl(RNTupleModel &model) final;
148 void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
149 void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final;
150
151 void CommitSuppressedColumn(ColumnHandle_t columnHandle) final;
152 void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final;
153 void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final;
154 void CommitSealedPageV(std::span<RPageStorage::RSealedPageGroup> ranges) final;
155 std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final;
156 void CommitClusterGroup() final;
157 void CommitDatasetImpl() final;
158
159 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
160}; // RPageSinkBuf
161
162} // namespace Internal
163} // namespace Experimental
164} // namespace ROOT
165
166#endif
A non thread-safe integral performance counter.
An either thread-safe or non thread safe counter for CPU ticks.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:42
std::tuple< std::deque< RPageZipItem >, RPageStorage::SealedPageSequence_t > BufferedPages_t
RPageZipItem & BufferPage(RPageStorage::ColumnHandle_t columnHandle)
Returns a reference to the newly buffered page.
RColumnBuf & operator=(const RColumnBuf &)=delete
const RPageStorage::ColumnHandle_t & GetHandle() const
RPageStorage::SealedPageSequence_t fSealedPages
Pages that have been already sealed by a concurrent task.
BufferedPages_t DrainBufferedPages()
When the return value of DrainBufferedPages() is destroyed, all references returned by GetBuffer are ...
std::deque< RPageZipItem > fBufferedPages
Using a deque guarantees that element iterators are never invalidated by appends to the end of the it...
RColumnBuf & operator=(RColumnBuf &&)=default
const RPageStorage::SealedPageSequence_t & GetSealedPages() const
Wrapper sink that coalesces cluster column page writes.
RPageSinkBuf & operator=(RPageSinkBuf &&)=default
std::uint64_t CommitCluster(NTupleSize_t nNewEntries) final
Finalize the current cluster and create a new one for the following data.
RPageSinkBuf(const RPageSinkBuf &)=delete
std::vector< RColumnBuf > fBufferedColumns
Vector of buffered column pages. Indexed by column id.
std::unique_ptr< RNTupleModel > fInnerModel
The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
std::unique_ptr< RPageSink > fInnerSink
The inner sink, responsible for actually performing I/O.
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements; nElements must be...
void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges) final
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
const RNTupleDescriptor & GetDescriptor() const final
Return the RNTupleDescriptor being constructed.
std::unique_ptr< RCounters > fCounters
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
void InitImpl(RNTupleModel &model) final
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
void ConnectFields(const std::vector< RFieldBase * > &fields, NTupleSize_t firstEntry)
std::vector< ColumnHandle_t > fSuppressedColumns
Columns committed as suppressed are stored and passed to the inner sink at cluster commit.
void CommitSealedPage(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final
Write a preprocessed page to storage. The column must have been added before.
void CommitClusterGroup() final
Write out the page locations (page list envelope) for all the committed clusters since the last call ...
void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final
Adds an extra type information record to schema.
void CommitSuppressedColumn(ColumnHandle_t columnHandle) final
Commits a suppressed column for the current cluster.
RPageSinkBuf & operator=(const RPageSinkBuf &)=delete
Abstract interface to write data into an ntuple.
Common functionality of an ntuple storage for both reading and writing.
std::deque< RSealedPage > SealedPageSequence_t
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:46
Field specific extra type information from the header / extenstion header.
The on-storage meta-data of an ntuple.
The RNTupleModel encapulates the schema of an ntuple.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The incremental changes to a RNTupleModel
I/O performance counters that get registered in fMetrics.
Detail::RNTupleTickCounter< Detail::RNTuplePlainCounter > & fTimeCpuCriticalSection
A range of sealed pages referring to the same column that can be used for vector commit.
A sealed page contains the bytes of a page as written to storage (packed & compressed).