Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2020-03-11
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RCluster
16#define ROOT_RCluster
17
18#include <ROOT/RNTupleTypes.hxx>
19
20#include <cstdint>
21#include <memory>
22#include <unordered_map>
23#include <unordered_set>
24#include <utility>
25#include <vector>
26
27namespace ROOT {
28namespace Internal {
29
30// clang-format off
31/**
32\class ROOT::Internal::ROnDiskPage
33\ingroup NTuple
34\brief A page as being stored on disk, that is packed and compressed
35
36Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
37uncompressed and unpacked before they can be used by RNTuple upper layers.
38*/
39// clang-format on
41private:
42 /// The memory location of the bytes
43 const void *fAddress = nullptr;
44 /// The compressed and packed size of the page. This includes both payload and checksum (if present)
45 std::uint32_t fSize = 0;
46
47public:
48 /// On-disk pages within a page source are identified by the column and page number. The key is used for
49 /// associative collections of on-disk pages.
50 struct Key {
52 std::uint64_t fPageNo;
53 Key(ROOT::DescriptorId_t columnId, std::uint64_t pageNo) : fPhysicalColumnId(columnId), fPageNo(pageNo) {}
54 friend bool operator ==(const Key &lhs, const Key &rhs) {
55 return lhs.fPhysicalColumnId == rhs.fPhysicalColumnId && lhs.fPageNo == rhs.fPageNo;
56 }
57 };
58
59 ROnDiskPage() = default;
60 ROnDiskPage(void *address, std::uint32_t size) : fAddress(address), fSize(size) {}
61
62 const void *GetAddress() const { return fAddress; }
63 std::uint32_t GetSize() const { return fSize; }
64
65 bool IsNull() const { return fAddress == nullptr; }
66}; // class ROnDiskPage
67
68} // namespace Internal
69} // namespace ROOT
70
71// For hash maps ROnDiskPage::Key --> ROnDiskPage
72namespace std
73{
74template <>
75struct hash<ROOT::Internal::ROnDiskPage::Key> {
76 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
77 size_t operator()(const ROOT::Internal::ROnDiskPage::Key &key) const
78 {
79 return (
80 (std::hash<ROOT::DescriptorId_t>()(key.fPhysicalColumnId) ^ (hash<ROOT::NTupleSize_t>()(key.fPageNo) << 1)) >>
81 1);
82 }
83};
84}
85
86namespace ROOT {
87namespace Internal {
88
89// clang-format off
90/**
91\class ROOT::Internal::ROnDiskPageMap
92\ingroup NTuple
93\brief A memory region that contains packed and compressed pages
94
95Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
96*/
97// clang-format on
99 friend class RCluster;
100
101private:
102 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
103
104public:
105 ROnDiskPageMap() = default;
106 ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
107 ROnDiskPageMap(ROnDiskPageMap &&other) = default;
108 ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
111
112 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
113 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
114 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
115 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
116}; // class ROnDiskPageMap
117
118// clang-format off
119/**
120\class ROOT::Internal::ROnDiskPageMapHeap
121\ingroup NTuple
122\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
123*/
124// clang-format on
126private:
127 /// The memory region containing the on-disk pages.
128 std::unique_ptr<unsigned char []> fMemory;
129public:
130 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
136}; // class ROnDiskPageMapHeap
137
138// clang-format off
139/**
140\class ROOT::Internal::RCluster
141\ingroup NTuple
142\brief An in-memory subset of the packed and compressed pages of a cluster
143
144Binds together several page maps that represent all the pages of certain columns of a cluster
145*/
146// clang-format on
147class RCluster {
148public:
149 using ColumnSet_t = std::unordered_set<ROOT::DescriptorId_t>;
150 /// The identifiers that specifies the content of a (partial) cluster
155
156protected:
157 /// References the cluster identifier in the page source that created the cluster
159 /// Multiple page maps can be combined in a single RCluster
160 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
161 /// Set of the (complete) columns represented by the RCluster
163 /// Lookup table for the on-disk pages
164 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
165
166public:
167 explicit RCluster(ROOT::DescriptorId_t clusterId) : fClusterId(clusterId) {}
168 RCluster(const RCluster &other) = delete;
169 RCluster(RCluster &&other) = default;
170 RCluster &operator =(const RCluster &other) = delete;
171 RCluster &operator =(RCluster &&other) = default;
172 ~RCluster() = default;
173
174 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
175 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
176 /// Their content is supposed to be the same.
177 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
178 /// (e.g. mmap vs. malloc).
179 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
180 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
181 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
182 /// (left to the implementation).
183 void Adopt(RCluster &&other);
184 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
185 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
186 /// typically the last step of RPageSouce::LoadCluster().
187 void SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId);
188 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
189
192 bool ContainsColumn(ROOT::DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
193 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
194}; // class RCluster
195
196} // namespace Internal
197} // namespace ROOT
198
199#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TRObject operator()(const T1 &t1) const
RCluster(ROOT::DescriptorId_t clusterId)
Definition RCluster.hxx:167
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition RCluster.hxx:160
ColumnSet_t fAvailPhysicalColumns
Set of the (complete) columns represented by the RCluster.
Definition RCluster.hxx:162
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:149
const ColumnSet_t & GetAvailPhysicalColumns() const
Definition RCluster.hxx:191
bool ContainsColumn(ROOT::DescriptorId_t colId) const
Definition RCluster.hxx:192
ROOT::DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition RCluster.hxx:158
RCluster(ROOT::DescriptorId_t clusterId)
Definition RCluster.hxx:167
void SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition RCluster.cxx:61
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition RCluster.cxx:30
RCluster(const RCluster &other)=delete
RCluster(RCluster &&other)=default
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition RCluster.cxx:38
size_t GetNOnDiskPages() const
Definition RCluster.hxx:193
RCluster & operator=(const RCluster &other)=delete
ROOT::DescriptorId_t GetId() const
Definition RCluster.hxx:190
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition RCluster.hxx:164
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition RCluster.hxx:128
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition RCluster.hxx:130
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition RCluster.hxx:102
ROnDiskPageMap(ROnDiskPageMap &&other)=default
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition RCluster.hxx:115
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:40
std::uint32_t fSize
The compressed and packed size of the page. This includes both payload and checksum (if present).
Definition RCluster.hxx:45
ROnDiskPage(void *address, std::uint32_t size)
Definition RCluster.hxx:60
const void * fAddress
The memory location of the bytes.
Definition RCluster.hxx:43
std::uint32_t GetSize() const
Definition RCluster.hxx:63
const void * GetAddress() const
Definition RCluster.hxx:62
ROnDiskPageMap()=default
ROnDiskPage()=default
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:151
ROOT::DescriptorId_t fClusterId
Definition RCluster.hxx:152
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:50
ROOT::DescriptorId_t fPhysicalColumnId
Definition RCluster.hxx:51
friend bool operator==(const Key &lhs, const Key &rhs)
Definition RCluster.hxx:54
Key(ROOT::DescriptorId_t columnId, std::uint64_t pageNo)
Definition RCluster.hxx:53
Key(ROOT::DescriptorId_t columnId, std::uint64_t pageNo)
Definition RCluster.hxx:53