Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2020-03-11
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RCluster
17#define ROOT7_RCluster
18
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <cstdint>
22#include <memory>
23#include <unordered_map>
24#include <unordered_set>
25#include <utility>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30namespace Internal {
31
32// clang-format off
33/**
34\class ROnDiskPage
35\ingroup NTuple
36\brief A page as being stored on disk, that is packed and compressed
37
38Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
39uncompressed and unpacked before they can be used by RNTuple upper layers.
40*/
41// clang-format on
43private:
44 /// The memory location of the bytes
45 const void *fAddress = nullptr;
46 /// The compressed and packed size of the page. This includes both payload and checksum (if present)
47 std::uint32_t fSize = 0;
48
49public:
50 /// On-disk pages within a page source are identified by the column and page number. The key is used for
51 /// associative collections of on-disk pages.
52 struct Key {
54 std::uint64_t fPageNo;
55 Key(DescriptorId_t columnId, std::uint64_t pageNo) : fPhysicalColumnId(columnId), fPageNo(pageNo) {}
56 friend bool operator ==(const Key &lhs, const Key &rhs) {
57 return lhs.fPhysicalColumnId == rhs.fPhysicalColumnId && lhs.fPageNo == rhs.fPageNo;
58 }
59 };
60
61 ROnDiskPage() = default;
62 ROnDiskPage(void *address, std::uint32_t size) : fAddress(address), fSize(size) {}
63
64 const void *GetAddress() const { return fAddress; }
65 std::uint32_t GetSize() const { return fSize; }
66
67 bool IsNull() const { return fAddress == nullptr; }
68}; // class ROnDiskPage
69
70} // namespace Internal
71} // namespace Experimental
72} // namespace ROOT
73
74// For hash maps ROnDiskPage::Key --> ROnDiskPage
75namespace std
76{
77template <>
78struct hash<ROOT::Experimental::Internal::ROnDiskPage::Key> {
79 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
81 {
82 return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fPhysicalColumnId) ^
83 (hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >>
84 1);
85 }
86};
87}
88
89
90namespace ROOT {
91namespace Experimental {
92namespace Internal {
93
94// clang-format off
95/**
96\class ROOT::Experimental::Internal::ROnDiskPageMap
97\ingroup NTuple
98\brief A memory region that contains packed and compressed pages
99
100Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
101*/
102// clang-format on
104 friend class RCluster;
105
106private:
107 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
108
109public:
110 ROnDiskPageMap() = default;
111 ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
112 ROnDiskPageMap(ROnDiskPageMap &&other) = default;
113 ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
116
117 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
118 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
119 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
120 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
121}; // class ROnDiskPageMap
122
123// clang-format off
124/**
125\class ROOT::Experimental::Internal::ROnDiskPageMapHeap
126\ingroup NTuple
127\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
128*/
129// clang-format on
131private:
132 /// The memory region containing the on-disk pages.
133 std::unique_ptr<unsigned char []> fMemory;
134public:
135 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
141}; // class ROnDiskPageMapHeap
142
143// clang-format off
144/**
145\class ROOT::Experimental::Internal::RCluster
146\ingroup NTuple
147\brief An in-memory subset of the packed and compressed pages of a cluster
148
149Binds together several page maps that represent all the pages of certain columns of a cluster
150*/
151// clang-format on
152class RCluster {
153public:
154 using ColumnSet_t = std::unordered_set<DescriptorId_t>;
155 /// The identifiers that specifies the content of a (partial) cluster
156 struct RKey {
159 };
160
161protected:
162 /// References the cluster identifier in the page source that created the cluster
164 /// Multiple page maps can be combined in a single RCluster
165 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
166 /// Set of the (complete) columns represented by the RCluster
168 /// Lookup table for the on-disk pages
169 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
170
171public:
172 explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
173 RCluster(const RCluster &other) = delete;
174 RCluster(RCluster &&other) = default;
175 RCluster &operator =(const RCluster &other) = delete;
176 RCluster &operator =(RCluster &&other) = default;
177 ~RCluster() = default;
178
179 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
180 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
181 /// Their content is supposed to be the same.
182 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
183 /// (e.g. mmap vs. malloc).
184 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
185 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
186 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
187 /// (left to the implementation).
188 void Adopt(RCluster &&other);
189 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
190 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
191 /// typically the last step of RPageSouce::LoadCluster().
192 void SetColumnAvailable(DescriptorId_t physicalColumnId);
193 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
194
195 DescriptorId_t GetId() const { return fClusterId; }
197 bool ContainsColumn(DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
198 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
199}; // class RCluster
200
201} // namespace Internal
202} // namespace Experimental
203} // namespace ROOT
204
205#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TRObject operator()(const T1 &t1) const
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:152
DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition RCluster.hxx:163
void SetColumnAvailable(DescriptorId_t physicalColumnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition RCluster.cxx:63
RCluster(DescriptorId_t clusterId)
Definition RCluster.hxx:172
RCluster(const RCluster &other)=delete
RCluster(RCluster &&other)=default
bool ContainsColumn(DescriptorId_t colId) const
Definition RCluster.hxx:197
const ColumnSet_t & GetAvailPhysicalColumns() const
Definition RCluster.hxx:196
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition RCluster.hxx:169
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition RCluster.cxx:32
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition RCluster.hxx:165
RCluster & operator=(const RCluster &other)=delete
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:154
ColumnSet_t fAvailPhysicalColumns
Set of the (complete) columns represented by the RCluster.
Definition RCluster.hxx:167
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition RCluster.cxx:40
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition RCluster.hxx:130
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition RCluster.hxx:135
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition RCluster.hxx:133
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:103
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition RCluster.hxx:107
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition RCluster.hxx:120
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
ROnDiskPageMap(ROnDiskPageMap &&other)=default
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:42
std::uint32_t fSize
The compressed and packed size of the page. This includes both payload and checksum (if present)
Definition RCluster.hxx:47
const void * fAddress
The memory location of the bytes.
Definition RCluster.hxx:45
ROnDiskPage(void *address, std::uint32_t size)
Definition RCluster.hxx:62
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:156
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:52
Key(DescriptorId_t columnId, std::uint64_t pageNo)
Definition RCluster.hxx:55
friend bool operator==(const Key &lhs, const Key &rhs)
Definition RCluster.hxx:56