Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RPagePool.hxx
Go to the documentation of this file.
1/// \file ROOT/RPagePool.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RPagePool
17#define ROOT7_RPagePool
18
19#include <ROOT/RPage.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22
23#include <cstddef>
24#include <map>
25#include <mutex>
26#include <typeindex>
27#include <typeinfo>
28#include <unordered_map>
29#include <unordered_set>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35namespace Internal {
36
37// clang-format off
38/**
39\class ROOT::Experimental::Internal::RPagePool
40\ingroup NTuple
41\brief A thread-safe cache of pages loaded from the page source.
42
43The page pool is used as a cache for pages loaded from a page source.
44In this way, identical page needed at the same time, only need to be loaded once.
45Page sources also use the page pool to stage (preload) pages unsealed by IMT tasks.
46*/
47// clang-format on
48class RPagePool {
49 friend class RPageRef;
50
51public:
52 // Search key for a set of pages covering the same column and in-memory target type.
53 // Within the set of pages, one needs to find the page of a given index.
54 struct RKey {
56 std::type_index fInMemoryType = std::type_index(typeid(void));
57
58 bool operator==(const RKey &other) const
59 {
60 return this->fColumnId == other.fColumnId && this->fInMemoryType == other.fInMemoryType;
61 }
62
63 bool operator!=(const RKey &other) const { return !(*this == other); }
64 };
65
66private:
67 /// Hash function to be used in the unordered map fLookupByKey
68 struct RKeyHasher {
69 /// Like boost::hash_combine
70 std::size_t operator()(const RKey &k) const
71 {
72 auto seed = std::hash<DescriptorId_t>()(k.fColumnId);
73 return seed ^ (std::hash<std::type_index>()(k.fInMemoryType) + 0x9e3779b9 + (seed << 6) + (seed >> 2));
74 }
75 };
76
77 /// Every page in the page pool is annotated with a search key and a reference counter.
78 struct REntry {
81 std::int64_t fRefCounter = 0;
82 };
83
84 /// Used in fLookupByKey to store both the absolute and the cluster-local page index of the referenced page.
85 /// This allows to do binary search for one or the other. Note that elements in fLookupByKey must have
86 /// _both_ values to be valid. If RPagePosition is used as a search key, only one of the two needs to be set.
90
91 bool operator<(const RPagePosition &other) const
92 {
95
103 }
104
105 // Constructor used to store a page in fLookupByKey
106 explicit RPagePosition(const RPage &page)
107 : fGlobalFirstElement(page.GetGlobalRangeFirst()),
109 {
110 }
111
112 // Search key constructors
113 explicit RPagePosition(NTupleSize_t globalIndex) : fGlobalFirstElement(globalIndex) {}
114 explicit RPagePosition(RClusterIndex clusterIndex) : fClusterFirstElement(clusterIndex) {}
115 };
116
117 std::vector<REntry> fEntries; ///< All cached pages in the page pool
118 /// Used in ReleasePage() to find the page index in fPages
119 std::unordered_map<void *, std::size_t> fLookupByBuffer;
120 /// Used in GetPage() to find the right page in fEntries. Lookup for the key (pair of on-disk and in-memory type)
121 /// takes place in O(1). The selected pages are identified by index into the fEntries vector (map's value)
122 /// and sorted by the position of the page in the column (map's key). Thus, access to pages of the page set
123 /// has logarithmic complexity.
124 std::unordered_map<RKey, std::map<RPagePosition, std::size_t>, RKeyHasher> fLookupByKey;
125 /// Remembers pages with reference counter 0, organized by the page's cluster id. The pages are identified
126 /// by their page buffer address. The fLookupByBuffer map can be used to resolve the address to a page.
127 /// Once a page gets used, it is removed from the unused pages list. Evict will remove all unused pages
128 /// from a given cluster id.
129 std::unordered_map<DescriptorId_t, std::unordered_set<void *>> fUnusedPages;
130 std::mutex fLock; ///< The page pool is accessed concurrently due to parallel decompression
131
132 /// Add a new page to the fLookupByBuffer and fLookupByKey data structures.
133 REntry &AddPage(RPage page, const RKey &key, std::int64_t initialRefCounter);
134
135 /// Give back a page to the pool and decrease the reference counter. There must not be any pointers anymore into
136 /// this page. If the reference counter drops to zero, the page pool might decide to call the deleter given in
137 /// during registration. Called by the RPageRef destructor.
138 void ReleasePage(const RPage &page);
139
140 /// Called by GetPage(), when the reference counter increases from zero to one
141 void RemoveFromUnusedPages(const RPage &page);
142
143 /// Called both by ReleasePage() and by Evict() to remove an unused page from the pool
144 void ErasePage(std::size_t entryIdx, decltype(fLookupByBuffer)::iterator lookupByBufferItr);
145
146public:
147 RPagePool() = default;
148 RPagePool(const RPagePool&) = delete;
149 RPagePool& operator =(const RPagePool&) = delete;
150 ~RPagePool() = default;
151
152 /// Adds a new page to the pool. Upon registration, the page pool takes ownership of the page's memory.
153 /// The new page has its reference counter set to 1.
154 RPageRef RegisterPage(RPage page, RKey key);
155 /// Like RegisterPage() but the reference counter is initialized to 0. In addition, the page is added
156 /// to the set of unused pages of the page's cluster (see Evict()).
157 void PreloadPage(RPage page, RKey key);
158 /// Removes unused pages (pages with reference counter 0) from the page pool. Users of PreloadPage() should
159 /// use Evict() appropriately to avoid accumulation of unused pages.
160 void Evict(DescriptorId_t clusterId);
161 /// Tries to find the page corresponding to column and index in the cache. If the page is found, its reference
162 /// counter is increased
163 RPageRef GetPage(RKey key, NTupleSize_t globalIndex);
164 RPageRef GetPage(RKey key, RClusterIndex clusterIndex);
165};
166
167// clang-format off
168/**
169\class ROOT::Experimental::Internal::RPageRef
170\ingroup NTuple
171\brief Reference to a page stored in the page pool
172
173The referenced page knows about its page pool and decreases the reference counter on destruction.
174*/
175// clang-format on
176class RPageRef {
177 friend class RPagePool;
178
181
182 // Called as delegated constructor and directly by the page pool
183 RPageRef(const RPage &page, RPagePool *pagePool) : fPagePool(pagePool)
184 {
185 // We leave the fPage::fPageAllocator member unset (nullptr), since fPage is a non-owning view on the page
186 fPage.fBuffer = page.fBuffer;
192 }
193
194public:
195 RPageRef() = default;
196 RPageRef(const RPageRef &other) = delete;
197 RPageRef &operator=(const RPageRef &other) = delete;
198
199 RPageRef(RPageRef &&other) : RPageRef(other.fPage, other.fPagePool) { other.fPagePool = nullptr; }
200
202 {
203 if (this != &other) {
204 std::swap(fPage, other.fPage);
205 std::swap(fPagePool, other.fPagePool);
206 }
207 return *this;
208 }
209
211 {
212 if (fPagePool)
214 }
215
216 /// Used by the friend virtual page source to map the cluster ID to its virtual counterpart
218 {
220 }
221
222 const RPage &Get() const { return fPage; }
223};
224
225} // namespace Internal
226
227} // namespace Experimental
228} // namespace ROOT
229
230#endif
A thread-safe cache of pages loaded from the page source.
Definition RPagePool.hxx:48
REntry & AddPage(RPage page, const RKey &key, std::int64_t initialRefCounter)
Add a new page to the fLookupByBuffer and fLookupByKey data structures.
Definition RPagePool.cxx:26
void PreloadPage(RPage page, RKey key)
Like RegisterPage() but the reference counter is initialized to 0.
Definition RPagePool.cxx:57
RPagePool & operator=(const RPagePool &)=delete
std::unordered_map< void *, std::size_t > fLookupByBuffer
Used in ReleasePage() to find the page index in fPages.
std::vector< REntry > fEntries
All cached pages in the page pool.
RPagePool(const RPagePool &)=delete
void ReleasePage(const RPage &page)
Give back a page to the pool and decrease the reference counter.
Definition RPagePool.cxx:91
void Evict(DescriptorId_t clusterId)
Removes unused pages (pages with reference counter 0) from the page pool.
void ErasePage(std::size_t entryIdx, decltype(fLookupByBuffer)::iterator lookupByBufferItr)
Called both by ReleasePage() and by Evict() to remove an unused page from the pool.
Definition RPagePool.cxx:65
std::unordered_map< RKey, std::map< RPagePosition, std::size_t >, RKeyHasher > fLookupByKey
Used in GetPage() to find the right page in fEntries.
std::unordered_map< DescriptorId_t, std::unordered_set< void * > > fUnusedPages
Remembers pages with reference counter 0, organized by the page's cluster id.
void RemoveFromUnusedPages(const RPage &page)
Called by GetPage(), when the reference counter increases from zero to one.
std::mutex fLock
The page pool is accessed concurrently due to parallel decompression.
RPageRef GetPage(RKey key, NTupleSize_t globalIndex)
Tries to find the page corresponding to column and index in the cache.
RPageRef RegisterPage(RPage page, RKey key)
Adds a new page to the pool.
Definition RPagePool.cxx:51
Reference to a page stored in the page pool.
RPageRef(const RPage &page, RPagePool *pagePool)
RPageRef & operator=(const RPageRef &other)=delete
RPageRef & operator=(RPageRef &&other)
void ChangeClusterId(DescriptorId_t clusterId)
Used by the friend virtual page source to map the cluster ID to its virtual counterpart.
RPageRef(const RPageRef &other)=delete
Stores information about the cluster in which this page resides.
Definition RPage.hxx:56
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:47
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:128
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:132
std::uint32_t fMaxElements
The capacity of the page in number of elements.
Definition RPage.hxx:76
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Every page in the page pool is annotated with a search key and a reference counter.
Definition RPagePool.hxx:78
Hash function to be used in the unordered map fLookupByKey.
Definition RPagePool.hxx:68
std::size_t operator()(const RKey &k) const
Like boost::hash_combine.
Definition RPagePool.hxx:70
bool operator==(const RKey &other) const
Definition RPagePool.hxx:58
bool operator!=(const RKey &other) const
Definition RPagePool.hxx:63
Used in fLookupByKey to store both the absolute and the cluster-local page index of the referenced pa...
Definition RPagePool.hxx:87
bool operator<(const RPagePosition &other) const
Definition RPagePool.hxx:91