Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5
6/*************************************************************************
7 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleWriteOptions
15#define ROOT_RNTupleWriteOptions
16
17#include <Compression.h>
18
19#include <cstdint>
20#include <cstddef>
21#include <memory>
22
23namespace ROOT {
24
25class RNTupleWriteOptions;
26
27namespace Internal {
28
30public:
31 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
32};
33
34} // namespace Internal
35
36// clang-format off
37/**
38\class ROOT::RNTupleWriteOptions
39\ingroup NTuple
40\brief Common user-tunable settings for storing RNTuples
41
42All page sink classes need to support the common options.
43
44<table>
45<tr>
46<th>Option name</th>
47<th>Type</th>
48<th>Default</th>
49<th>Description</th>
50</tr>
51
52<tr>
53<td>`Compression`</td>
54<td>`std::uint32_t`</td>
55<td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
56<td>
57The compression settings for this RNTuple
58</td>
59</tr>
60
61<tr>
62<td>`ApproxZippedClusterSize`</td>
63<td>`std::size_t`</td>
64<td>128 MiB</td>
65<td>
66Approximation of the target compressed cluster size
67</td>
68</tr>
69
70<tr>
71<td>`MaxUnzippedClusterSize`</td>
72<td>`std::size_t`</td>
73<td>1280 MiB</td>
74<td>
75Memory limit for committing a cluster: with very high compression ratio, we need a limit
76on how large the I/O buffer can grow during writing.
77</td>
78</tr>
79
80<tr>
81<td>`InitialUnzippedPageSize`</td>
82<td>`std::size_t`</td>
83<td>256</td>
84<td>
85Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
86of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
87given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
88The total write buffer limit needs to be large enough to hold the initial pages of all columns.
89</td>
90</tr>
91
92<tr>
93<td>`MaxUnzippedPageSize`</td>
94<td>`std::size_t`</td>
95<td>1 MiB</td>
96<td>
97Pages can grow only to the given limit in bytes.
98</td>
99</tr>
100
101<tr>
102<td>`PageBufferBudget`</td>
103<td>`std::size_t`</td>
104<td>0 / auto</td>
105<td>
106The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
107If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
108If set manually, the size needs to be large enough to hold all initial page buffers.
109The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
110Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
111The default values are tuned for a total write memory of around 400 MiB per fill context.
112</td>
113</tr>
114
115<tr>
116<td>`UseBufferedWrite`</td>
117<td>`bool`</td>
118<td>`true`</td>
119<td>
120Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
121to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
122</td>
123</tr>
124
125<tr>
126<td>`UseDirectIO`</td>
127<td>`bool`</td>
128<td>`false`</td>
129<td>
130Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
131filesystems and platforms.
132</td>
133</tr>
134
135<tr>
136<td>`WriteBufferSize`</td>
137<td>`std::size_t`</td>
138<td>4 MiB</td>
139<td>
140Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
141performance (with Direct I/O) at a reasonable memory consumption.
142</td>
143</tr>
144
145<tr>
146<td>`UseImplicitMT`</td>
147<td>EImplicitMT</td>
148<td>EImplicitMT::kDefault</td>
149<td>
150Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
151The meaning of EImplicitMT::kDefault depends on the used writer: For the (sequential) RNTupleWriter, it translates
152to EImplicitMT::kOn and the user has to manually disable the use of implicit multi-threading if it is not wanted.
153For the RNTupleParalellWriter, the implementation defaults to EImplicitMT::kOff in order to avoid interference with
154explicit parallelism that might create one RNTupleFillContext per thread. If implicit multi-threading is wanted on
155top of this, the user has to explicitly request EImplicitMT::kOn.
156</td>
157</tr>
158
159<tr>
160<td>`EnablePageChecksums`</td>
161<td>`bool`</td>
162<td>`true`</td>
163<td>
164If set, checksums will be calculated and written for every page.
165If turned off, will also turn off `EnableSamePageMerging`.
166</td>
167</tr>
168
169<tr>
170<td>`EnableSamePageMerging`</td>
171<td>`bool`</td>
172<td>`true`</td>
173<td>
174If set, identical pages are deduplicated and aliased on disk.
175Requires `EnablePageChecksums` and will throw if previously disabled.
176</td>
177</tr>
178
179</table>
180*/
181// clang-format on
183public:
184 enum class EImplicitMT {
185 kOff,
186 kOn,
187 kDefault,
188 };
189
190 // clang-format off
191 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
192 // clang-format on
193
195
196protected:
198 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
200 std::size_t fInitialUnzippedPageSize = 256;
201 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
202 std::size_t fPageBufferBudget = 0;
203 bool fUseBufferedWrite = true;
204 bool fUseDirectIO = false;
205 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
209 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
210 /// any payload whose size exceeds this will be split into multiple keys.
212
213public:
214
215 virtual ~RNTupleWriteOptions() = default;
216 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
217
218 std::uint32_t GetCompression() const { return fCompression; }
219 void SetCompression(std::uint32_t val) { fCompression = val; }
224
226 void SetApproxZippedClusterSize(std::size_t val);
227
229 void SetMaxUnzippedClusterSize(std::size_t val);
230
232 void SetInitialUnzippedPageSize(std::size_t val);
233
234 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
235 void SetMaxUnzippedPageSize(std::size_t val);
236
237 std::size_t GetPageBufferBudget() const;
238 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
239
240 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
241 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
242
243 bool GetUseDirectIO() const { return fUseDirectIO; }
244 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
245
246 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
247 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
248
251
253 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
255 {
259 }
260 }
261
263 void SetEnableSamePageMerging(bool val);
264
265 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
266};
267
268namespace Internal {
270{
271 options.fMaxKeySize = maxKeySize;
272}
273
274} // namespace Internal
275} // namespace ROOT
276
277#endif // ROOT_RNTupleWriteOptions
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing RNTuples.
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
std::size_t GetPageBufferBudget() const
void SetMaxUnzippedClusterSize(std::size_t val)
void SetWriteBufferSize(std::size_t val)
std::size_t GetWriteBufferSize() const
std::size_t GetApproxZippedClusterSize() const
std::size_t GetMaxUnzippedClusterSize() const
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
void SetUseImplicitMT(EImplicitMT val)
static constexpr std::uint64_t kDefaultMaxKeySize
std::uint64_t GetMaxKeySize() const
void SetMaxUnzippedPageSize(std::size_t val)
void SetInitialUnzippedPageSize(std::size_t val)
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
virtual ~RNTupleWriteOptions()=default
void SetPageBufferBudget(std::size_t val)
void SetApproxZippedClusterSize(std::size_t val)
std::size_t GetMaxUnzippedPageSize() const
std::uint32_t GetCompression() const
void SetCompression(std::uint32_t val)
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT GetUseImplicitMT() const
std::size_t GetInitialUnzippedPageSize() const
Namespace for new ROOT classes and functions.
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58