Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5
6/*************************************************************************
7 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleWriteOptions
15#define ROOT_RNTupleWriteOptions
16
17#include <Compression.h>
18
19#include <cstdint>
20#include <cstddef>
21#include <memory>
22
23namespace ROOT {
24
25class RNTupleWriteOptions;
26
27namespace Internal {
28
30public:
31 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
32};
33
34} // namespace Internal
35
36// clang-format off
37/**
38\class ROOT::RNTupleWriteOptions
39\ingroup NTuple
40\brief Common user-tunable settings for storing RNTuples
41
42All page sink classes need to support the common options.
43
44<table>
45<tr>
46<th>Option name</th>
47<th>Type</th>
48<th>Default</th>
49<th>Description</th>
50</tr>
51
52<tr>
53<td>`Compression`</td>
54<td>`std::uint32_t`</td>
55<td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
56<td>
57The compression settings for this RNTuple
58</td>
59</tr>
60
61<tr>
62<td>`ApproxZippedClusterSize`</td>
63<td>`std::size_t`</td>
64<td>128 MiB</td>
65<td>
66Approximation of the target compressed cluster size
67</td>
68</tr>
69
70<tr>
71<td>`MaxUnzippedClusterSize`</td>
72<td>`std::size_t`</td>
73<td>1280 MiB</td>
74<td>
75Memory limit for committing a cluster: with very high compression ratio, we need a limit
76on how large the I/O buffer can grow during writing.
77</td>
78</tr>
79
80<tr>
81<td>`InitialUnzippedPageSize`</td>
82<td>`std::size_t`</td>
83<td>256</td>
84<td>
85Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
86of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
87given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
88The total write buffer limit needs to be large enough to hold the initial pages of all columns.
89</td>
90</tr>
91
92<tr>
93<td>`MaxUnzippedPageSize`</td>
94<td>`std::size_t`</td>
95<td>1 MiB</td>
96<td>
97Pages can grow only to the given limit in bytes.
98</td>
99</tr>
100
101<tr>
102<td>`PageBufferBudget`</td>
103<td>`std::size_t`</td>
104<td>0 / auto</td>
105<td>
106The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
107If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
108If set manually, the size needs to be large enough to hold all initial page buffers.
109The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
110Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
111The default values are tuned for a total write memory of around 400 MiB per fill context.
112</td>
113</tr>
114
115<tr>
116<td>`UseBufferedWrite`</td>
117<td>`bool`</td>
118<td>`true`</td>
119<td>
120Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
121to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
122</td>
123</tr>
124
125<tr>
126<td>`UseDirectIO`</td>
127<td>`bool`</td>
128<td>`false`</td>
129<td>
130Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
131filesystems and platforms.
132</td>
133</tr>
134
135<tr>
136<td>`WriteBufferSize`</td>
137<td>`std::size_t`</td>
138<td>4 MiB</td>
139<td>
140Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
141performance (with Direct I/O) at a reasonable memory consumption.
142</td>
143</tr>
144
145<tr>
146<td>`UseImplicitMT`</td>
147<td>EImplicitMT</td>
148<td>EImplicitMT::kDefault</td>
149<td>
150Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
151</td>
152</tr>
153
154<tr>
155<td>`EnablePageChecksums`</td>
156<td>`bool`</td>
157<td>`true`</td>
158<td>
159If set, checksums will be calculated and written for every page.
160If turned off, will also turn off `EnableSamePageMerging`.
161</td>
162</tr>
163
164<tr>
165<td>`EnableSamePageMerging`</td>
166<td>`bool`</td>
167<td>`true`</td>
168<td>
169If set, identical pages are deduplicated and aliased on disk.
170Requires `EnablePageChecksums` and will throw if previously disabled.
171</td>
172</tr>
173
174</table>
175*/
176// clang-format on
178public:
179 enum class EImplicitMT {
180 kOff,
181 kDefault,
182 };
183
184 // clang-format off
185 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
186 // clang-format on
187
189
190protected:
192 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
194 std::size_t fInitialUnzippedPageSize = 256;
195 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
196 std::size_t fPageBufferBudget = 0;
197 bool fUseBufferedWrite = true;
198 bool fUseDirectIO = false;
199 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
203 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
204 /// any payload whose size exceeds this will be split into multiple keys.
206
207public:
208
209 virtual ~RNTupleWriteOptions() = default;
210 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
211
212 std::uint32_t GetCompression() const { return fCompression; }
213 void SetCompression(std::uint32_t val) { fCompression = val; }
218
220 void SetApproxZippedClusterSize(std::size_t val);
221
223 void SetMaxUnzippedClusterSize(std::size_t val);
224
226 void SetInitialUnzippedPageSize(std::size_t val);
227
228 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
229 void SetMaxUnzippedPageSize(std::size_t val);
230
231 std::size_t GetPageBufferBudget() const;
232 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
233
234 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
235 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
236
237 bool GetUseDirectIO() const { return fUseDirectIO; }
238 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
239
240 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
241 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
242
245
247 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
249 {
253 }
254 }
255
257 void SetEnableSamePageMerging(bool val);
258
259 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
260};
261
262namespace Internal {
264{
265 options.fMaxKeySize = maxKeySize;
266}
267
268} // namespace Internal
269} // namespace ROOT
270
271#endif // ROOT_RNTupleWriteOptions
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing RNTuples.
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
std::size_t GetPageBufferBudget() const
void SetMaxUnzippedClusterSize(std::size_t val)
void SetWriteBufferSize(std::size_t val)
std::size_t GetWriteBufferSize() const
std::size_t GetApproxZippedClusterSize() const
std::size_t GetMaxUnzippedClusterSize() const
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
void SetUseImplicitMT(EImplicitMT val)
static constexpr std::uint64_t kDefaultMaxKeySize
std::uint64_t GetMaxKeySize() const
void SetMaxUnzippedPageSize(std::size_t val)
void SetInitialUnzippedPageSize(std::size_t val)
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
virtual ~RNTupleWriteOptions()=default
void SetPageBufferBudget(std::size_t val)
void SetApproxZippedClusterSize(std::size_t val)
std::size_t GetMaxUnzippedPageSize() const
std::uint32_t GetCompression() const
void SetCompression(std::uint32_t val)
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT GetUseImplicitMT() const
std::size_t GetInitialUnzippedPageSize() const
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58