Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2024-02-22
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RNTupleWriteOptions
14#define ROOT_RNTupleWriteOptions
15
16#include <Compression.h>
17
18#include <cstdint>
19#include <cstddef>
20#include <memory>
21
22namespace ROOT {
23
25
26namespace Internal {
27
29public:
30 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
31};
32
33} // namespace Internal
34
35// clang-format off
36/**
37\class ROOT::RNTupleWriteOptions
38\ingroup NTuple
39\brief Common user-tunable settings for storing RNTuples
40
41All page sink classes need to support the common options.
42
43<table>
44<tr>
45<th>Option name</th>
46<th>Type</th>
47<th>Default</th>
48<th>Description</th>
49</tr>
50
51<tr>
52<td>`Compression`</td>
53<td>`std::uint32_t`</td>
54<td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
55<td>
56The compression settings for this RNTuple
57</td>
58</tr>
59
60<tr>
61<td>`ApproxZippedClusterSize`</td>
62<td>`std::size_t`</td>
63<td>128 MiB</td>
64<td>
65Approximation of the target compressed cluster size
66</td>
67</tr>
68
69<tr>
70<td>`MaxUnzippedClusterSize`</td>
71<td>`std::size_t`</td>
72<td>1280 MiB</td>
73<td>
74Memory limit for committing a cluster: with very high compression ratio, we need a limit
75on how large the I/O buffer can grow during writing.
76</td>
77</tr>
78
79<tr>
80<td>`InitialUnzippedPageSize`</td>
81<td>`std::size_t`</td>
82<td>256</td>
83<td>
84Initially, columns start with a page of this size. The default value is chosen to accommodate at least 32 elements
85of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
86given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
87The total write buffer limit needs to be large enough to hold the initial pages of all columns.
88</td>
89</tr>
90
91<tr>
92<td>`MaxUnzippedPageSize`</td>
93<td>`std::size_t`</td>
94<td>1 MiB</td>
95<td>
96Pages can grow only to the given limit in bytes.
97</td>
98</tr>
99
100<tr>
101<td>`PageBufferBudget`</td>
102<td>`std::size_t`</td>
103<td>0 / auto</td>
104<td>
105The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
106If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
107If set manually, the size needs to be large enough to hold all initial page buffers.
108The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
109Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
110The default values are tuned for a total write memory of around 400 MiB per fill context.
111</td>
112</tr>
113
114<tr>
115<td>`UseBufferedWrite`</td>
116<td>`bool`</td>
117<td>`true`</td>
118<td>
119Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
120to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
121</td>
122</tr>
123
124<tr>
125<td>`UseDirectIO`</td>
126<td>`bool`</td>
127<td>`false`</td>
128<td>
129Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
130filesystems and platforms.
131</td>
132</tr>
133
134<tr>
135<td>`WriteBufferSize`</td>
136<td>`std::size_t`</td>
137<td>4 MiB</td>
138<td>
139Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
140performance (with Direct I/O) at a reasonable memory consumption.
141</td>
142</tr>
143
144<tr>
145<td>`UseImplicitMT`</td>
146<td>EImplicitMT</td>
147<td>EImplicitMT::kDefault</td>
148<td>
149Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
150The meaning of EImplicitMT::kDefault depends on the used writer: For the (sequential) RNTupleWriter, it translates
151to EImplicitMT::kOn and the user has to manually disable the use of implicit multi-threading if it is not wanted.
152For the RNTupleParalellWriter, the implementation defaults to EImplicitMT::kOff in order to avoid interference with
153explicit parallelism that might create one RNTupleFillContext per thread. If implicit multi-threading is wanted on
154top of this, the user has to explicitly request EImplicitMT::kOn.
155</td>
156</tr>
157
158<tr>
159<td>`EnablePageChecksums`</td>
160<td>`bool`</td>
161<td>`true`</td>
162<td>
163If set, checksums will be calculated and written for every page.
164If turned off, will also turn off `EnableSamePageMerging`.
165</td>
166</tr>
167
168<tr>
169<td>`EnableSamePageMerging`</td>
170<td>`bool`</td>
171<td>`true`</td>
172<td>
173If set, identical pages are deduplicated and aliased on disk.
174Requires `EnablePageChecksums` and will throw if previously disabled.
175</td>
176</tr>
177
178</table>
179*/
180// clang-format on
182public:
183 enum class EImplicitMT {
187 };
188
189 // clang-format off
190 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
191 // clang-format on
192
194
195protected:
197 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
199 std::size_t fInitialUnzippedPageSize = 256;
200 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
201 std::size_t fPageBufferBudget = 0;
202 bool fUseBufferedWrite = true;
203 bool fUseDirectIO = false;
204 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
208 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
209 /// any payload whose size exceeds this will be split into multiple keys.
211
212public:
213
214 virtual ~RNTupleWriteOptions() = default;
215 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
216
217 std::uint32_t GetCompression() const { return fCompression; }
218 void SetCompression(std::uint32_t val) { fCompression = val; }
219 void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
220 {
221 fCompression = CompressionSettings(algorithm, compressionLevel);
222 }
223
225 void SetApproxZippedClusterSize(std::size_t val);
226
228 void SetMaxUnzippedClusterSize(std::size_t val);
229
231 void SetInitialUnzippedPageSize(std::size_t val);
232
233 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
234 void SetMaxUnzippedPageSize(std::size_t val);
235
236 std::size_t GetPageBufferBudget() const;
237 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
238
239 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
240 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
241
242 bool GetUseDirectIO() const { return fUseDirectIO; }
243 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
244
245 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
246 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
247
250
252 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
254 {
258 }
259 }
260
262 void SetEnableSamePageMerging(bool val);
263
264 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
265
277
278 friend bool operator!=(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs) { return !(lhs == rhs); }
279};
280
281namespace Internal {
282inline void RNTupleWriteOptionsManip::SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
283{
284 options.fMaxKeySize = maxKeySize;
285}
286
287} // namespace Internal
288} // namespace ROOT
289
290#endif // ROOT_RNTupleWriteOptions
@ kDefault
Definition TSystem.h:243
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing RNTuples.
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
friend bool operator==(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs)
std::size_t GetPageBufferBudget() const
void SetMaxUnzippedClusterSize(std::size_t val)
void SetWriteBufferSize(std::size_t val)
std::size_t GetWriteBufferSize() const
std::size_t GetApproxZippedClusterSize() const
std::size_t GetMaxUnzippedClusterSize() const
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
void SetUseImplicitMT(EImplicitMT val)
static constexpr std::uint64_t kDefaultMaxKeySize
std::uint64_t GetMaxKeySize() const
friend bool operator!=(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs)
void SetMaxUnzippedPageSize(std::size_t val)
void SetInitialUnzippedPageSize(std::size_t val)
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
virtual ~RNTupleWriteOptions()=default
void SetPageBufferBudget(std::size_t val)
void SetApproxZippedClusterSize(std::size_t val)
std::size_t GetMaxUnzippedPageSize() const
std::uint32_t GetCompression() const
void SetCompression(std::uint32_t val)
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT GetUseImplicitMT() const
std::size_t GetInitialUnzippedPageSize() const
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58