Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RPageSinkBuf.hxx
Go to the documentation of this file.
1/// \file ROOT/RPageSinkBuf.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Max Orok <maxwellorok@gmail.com>
5/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
6/// \date 2021-03-17
7/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
8/// is welcome!
9
10/*************************************************************************
11 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
12 * All rights reserved. *
13 * *
14 * For the licensing terms see $ROOTSYS/LICENSE. *
15 * For the list of contributors see $ROOTSYS/README/CREDITS. *
16 *************************************************************************/
17
18#ifndef ROOT7_RPageSinkBuf
19#define ROOT7_RPageSinkBuf
20
22#include <ROOT/RPageStorage.hxx>
23
24#include <deque>
25#include <iterator>
26#include <memory>
27#include <tuple>
28
29namespace ROOT {
30namespace Experimental {
31namespace Detail {
32
33// clang-format off
34/**
35\class ROOT::Experimental::Detail::RPageSinkBuf
36\ingroup NTuple
37\brief Wrapper sink that coalesces cluster column page writes
38*
39* TODO(jblomer): The interplay of derived class and RPageSink is not yet optimally designed for page storage wrapper
40* classes like this one. Header and footer serialization, e.g., are done twice. To be revised.
41*/
42// clang-format on
43class RPageSinkBuf : public RPageSink {
44private:
45 /// A buffered column. The column is not responsible for RPage memory management (i.e.
46 /// ReservePage/ReleasePage), which is handled by the enclosing RPageSinkBuf.
47 class RColumnBuf {
48 public:
49 struct RPageZipItem {
51 // Compression scratch buffer for fSealedPage.
52 std::unique_ptr<unsigned char[]> fBuf;
54 explicit RPageZipItem(RPage page)
55 : fPage(page), fBuf(nullptr) {}
56 bool IsSealed() const { return fSealedPage != nullptr; }
57 void AllocateSealedPageBuf() { fBuf = std::unique_ptr<unsigned char[]>(new unsigned char[fPage.GetNBytes()]); }
58 };
59 public:
60 RColumnBuf() = default;
61 RColumnBuf(const RColumnBuf&) = delete;
62 RColumnBuf& operator=(const RColumnBuf&) = delete;
63 RColumnBuf(RColumnBuf&&) = default;
66
67 /// Returns a reference to the newly buffered page. The reference remains
68 /// valid until the return value of DrainBufferedPages() is destroyed.
69 /// Note that `BufferPage()` yields the ownership of `page` to RColumnBuf.
71 RPageStorage::ColumnHandle_t columnHandle, const RPage &page)
72 {
73 if (!fCol) {
74 fCol = columnHandle;
75 }
76 // Safety: Insertion at the end of a deque never invalidates references
77 // to existing elements.
78 fBufferedPages.push_back(RPageZipItem(page));
79 return fBufferedPages.back();
80 }
81 const RPageStorage::ColumnHandle_t &GetHandle() const { return fCol; }
82 bool IsEmpty() const { return fBufferedPages.empty(); }
83 bool HasSealedPagesOnly() const { return fBufferedPages.size() == fSealedPages.size(); }
85
86 using BufferedPages_t = std::tuple<std::deque<RPageZipItem>, RPageStorage::SealedPageSequence_t>;
87 /// When the return value of DrainBufferedPages() is destroyed, all references
88 /// returned by GetBuffer are invalidated.
89 /// This function gives up on the ownership of the buffered pages. Thus, `ReleasePage()` must be called
90 /// accordingly.
92 {
93 BufferedPages_t drained;
94 std::swap(fBufferedPages, std::get<decltype(fBufferedPages)>(drained));
95 std::swap(fSealedPages, std::get<decltype(fSealedPages)>(drained));
96 return drained;
97 }
98 void DropBufferedPages();
99
100 // The returned reference points to a default-constructed RSealedPage. It can be used
101 // to fill in data after sealing.
103 {
104 return fSealedPages.emplace_back();
105 }
106
107 private:
109 /// Using a deque guarantees that element iterators are never invalidated
110 /// by appends to the end of the iterator by BufferPage.
111 std::deque<RPageZipItem> fBufferedPages;
112 /// Pages that have been already sealed by a concurrent task. A vector commit can be issued if all
113 /// buffered pages have been sealed.
114 /// Note that each RSealedPage refers to the same buffer as `fBufferedPages[i].fBuf` for some value of `i`, and
115 /// thus owned by RPageZipItem
117 };
118
119private:
120 /// I/O performance counters that get registered in fMetrics
121 struct RCounters {
123 };
124 std::unique_ptr<RCounters> fCounters;
126 /// The inner sink, responsible for actually performing I/O.
127 std::unique_ptr<RPageSink> fInnerSink;
128 /// The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
129 /// For the unbuffered case, the RNTupleModel is instead managed by a RNTupleWriter.
130 std::unique_ptr<RNTupleModel> fInnerModel;
131 /// Vector of buffered column pages. Indexed by column id.
132 std::vector<RColumnBuf> fBufferedColumns;
133
134protected:
135 void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) final;
136 RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
137 RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final;
138 std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final;
139 RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
140 void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
141
142public:
143 explicit RPageSinkBuf(std::unique_ptr<RPageSink> inner);
144 RPageSinkBuf(const RPageSinkBuf&) = delete;
148 ~RPageSinkBuf() override;
149
150 void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
151 RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
152 void ReleasePage(RPage &page) final;
153
154 RNTupleMetrics &GetMetrics() final { return fMetrics; }
155};
156
157} // namespace Detail
158} // namespace Experimental
159} // namespace ROOT
160
161#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
A collection of Counter objects with a name, a unit, and a description.
A non thread-safe integral performance counter.
const RPageStorage::ColumnHandle_t & GetHandle() const
RPageZipItem & BufferPage(RPageStorage::ColumnHandle_t columnHandle, const RPage &page)
Returns a reference to the newly buffered page.
RColumnBuf & operator=(const RColumnBuf &)=delete
const RPageStorage::SealedPageSequence_t & GetSealedPages() const
RColumnBuf & operator=(RColumnBuf &&)=default
RPageStorage::SealedPageSequence_t fSealedPages
Pages that have been already sealed by a concurrent task.
std::deque< RPageZipItem > fBufferedPages
Using a deque guarantees that element iterators are never invalidated by appends to the end of the it...
BufferedPages_t DrainBufferedPages()
When the return value of DrainBufferedPages() is destroyed, all references returned by GetBuffer are ...
std::tuple< std::deque< RPageZipItem >, RPageStorage::SealedPageSequence_t > BufferedPages_t
Wrapper sink that coalesces cluster column page writes.
void ReleasePage(RPage &page) final
Every page store needs to be able to free pages it handed out.
std::unique_ptr< RCounters > fCounters
RPageSinkBuf(RPageSinkBuf &&)=default
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final
Get a new, empty page for the given column that can be filled with up to nElements.
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final
Incorporate incremental changes to the model into the ntuple descriptor.
RPageSinkBuf & operator=(RPageSinkBuf &&)=default
RPageSinkBuf(const RPageSinkBuf &)=delete
std::unique_ptr< RPageSink > fInnerSink
The inner sink, responsible for actually performing I/O.
RNTupleMetrics & GetMetrics() final
Returns the default metrics object. Subclasses might alternatively provide their own metrics object b...
RPageSinkBuf & operator=(const RPageSinkBuf &)=delete
std::vector< RColumnBuf > fBufferedColumns
Vector of buffered column pages. Indexed by column id.
void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final
std::uint64_t CommitClusterImpl(NTupleSize_t nEntries) final
Returns the number of bytes written to storage (excluding metadata)
RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final
Returns the locator of the page list envelope of the given buffer that contains the serialized page l...
RNTupleLocator CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RSealedPage &sealedPage) final
void CreateImpl(const RNTupleModel &model, unsigned char *serializedHeader, std::uint32_t length) final
std::unique_ptr< RNTupleModel > fInnerModel
The buffered page sink maintains a copy of the RNTupleModel for the inner sink.
RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final
Abstract interface to write data into an ntuple.
std::deque< RSealedPage > SealedPageSequence_t
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:42
std::uint32_t GetNBytes() const
The space taken by column elements in the buffer.
Definition RPage.hxx:84
The RNTupleModel encapulates the schema of an ntuple.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The incremental changes to a RNTupleModel
I/O performance counters that get registered in fMetrics.
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Generic information about the physical location of data.