Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleWriteOptions
17#define ROOT7_RNTupleWriteOptions
18
19#include <Compression.h>
20
21#include <cstdint>
22#include <cstddef>
23#include <memory>
24
25namespace ROOT {
26
27class RNTupleWriteOptions;
28
29namespace Internal {
30
32public:
33 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
34};
35
36} // namespace Internal
37
38// clang-format off
39/**
40\class ROOT::RNTupleWriteOptions
41\ingroup NTuple
42\brief Common user-tunable settings for storing RNTuples
43
44All page sink classes need to support the common options.
45
46<table>
47<tr>
48<th>Option name</th>
49<th>Type</th>
50<th>Default</th>
51<th>Description</th>
52</tr>
53
54<tr>
55<td>`Compression`</td>
56<td>`std::uint32_t`</td>
57<td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
58<td>
59The compression settings for this RNTuple
60</td>
61</tr>
62
63<tr>
64<td>`ApproxZippedClusterSize`</td>
65<td>`std::size_t`</td>
66<td>128 MiB</td>
67<td>
68Approximation of the target compressed cluster size
69</td>
70</tr>
71
72<tr>
73<td>`MaxUnzippedClusterSize`</td>
74<td>`std::size_t`</td>
75<td>1280 MiB</td>
76<td>
77Memory limit for committing a cluster: with very high compression ratio, we need a limit
78on how large the I/O buffer can grow during writing.
79</td>
80</tr>
81
82<tr>
83<td>`InitialUnzippedPageSize`</td>
84<td>`std::size_t`</td>
85<td>256</td>
86<td>
87Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
88of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
89given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
90The total write buffer limit needs to be large enough to hold the initial pages of all columns.
91</td>
92</tr>
93
94<tr>
95<td>`MaxUnzippedPageSize`</td>
96<td>`std::size_t`</td>
97<td>1 MiB</td>
98<td>
99Pages can grow only to the given limit in bytes.
100</td>
101</tr>
102
103<tr>
104<td>`PageBufferBudget`</td>
105<td>`std::size_t`</td>
106<td>0 / auto</td>
107<td>
108The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
109If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
110If set manually, the size needs to be large enough to hold all initial page buffers.
111The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
112Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
113The default values are tuned for a total write memory of around 400 MiB per fill context.
114</td>
115</tr>
116
117<tr>
118<td>`UseBufferedWrite`</td>
119<td>`bool`</td>
120<td>`true`</td>
121<td>
122Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
123to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
124</td>
125</tr>
126
127<tr>
128<td>`UseDirectIO`</td>
129<td>`bool`</td>
130<td>`false`</td>
131<td>
132Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
133filesystems and platforms.
134</td>
135</tr>
136
137<tr>
138<td>`WriteBufferSize`</td>
139<td>`std::size_t`</td>
140<td>4 MiB</td>
141<td>
142Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
143performance (with Direct I/O) at a reasonable memory consumption.
144</td>
145</tr>
146
147<tr>
148<td>`UseImplicitMT`</td>
149<td>EImplicitMT</td>
150<td>EImplicitMT::kDefault</td>
151<td>
152Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
153</td>
154</tr>
155
156<tr>
157<td>`EnablePageChecksums`</td>
158<td>`bool`</td>
159<td>`true`</td>
160<td>
161If set, checksums will be calculated and written for every page.
162If turned off, will also turn off `EnableSamePageMerging`.
163</td>
164</tr>
165
166<tr>
167<td>`EnableSamePageMerging`</td>
168<td>`bool`</td>
169<td>`true`</td>
170<td>
171If set, identical pages are deduplicated and aliased on disk.
172Requires `EnablePageChecksums` and will throw if previously disabled.
173</td>
174</tr>
175
176</table>
177*/
178// clang-format on
180public:
181 enum class EImplicitMT {
182 kOff,
183 kDefault,
184 };
185
186 // clang-format off
187 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
188 // clang-format on
189
191
192protected:
194 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
196 std::size_t fInitialUnzippedPageSize = 256;
197 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
198 std::size_t fPageBufferBudget = 0;
199 bool fUseBufferedWrite = true;
200 bool fUseDirectIO = false;
201 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
205 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
206 /// any payload whose size exceeds this will be split into multiple keys.
208
209public:
210
211 virtual ~RNTupleWriteOptions() = default;
212 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
213
214 std::uint32_t GetCompression() const { return fCompression; }
215 void SetCompression(std::uint32_t val) { fCompression = val; }
220
222 void SetApproxZippedClusterSize(std::size_t val);
223
225 void SetMaxUnzippedClusterSize(std::size_t val);
226
228 void SetInitialUnzippedPageSize(std::size_t val);
229
230 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
231 void SetMaxUnzippedPageSize(std::size_t val);
232
233 std::size_t GetPageBufferBudget() const;
234 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
235
236 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
237 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
238
239 bool GetUseDirectIO() const { return fUseDirectIO; }
240 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
241
242 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
243 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
244
247
249 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
251 {
255 }
256 }
257
259 void SetEnableSamePageMerging(bool val);
260
261 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
262};
263
264namespace Internal {
266{
267 options.fMaxKeySize = maxKeySize;
268}
269
270} // namespace Internal
271
272namespace Experimental {
273// TODO(gparolini): remove before branching ROOT v6.36
274using RNTupleWriteOptions [[deprecated("ROOT::Experimental::RNTupleWriteOptions moved to ROOT::RNTupleWriteOptions")]] =
276} // namespace Experimental
277
278} // namespace ROOT
279
280#endif // ROOT7_RNTupleWriteOptions
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing RNTuples.
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
std::size_t GetPageBufferBudget() const
void SetMaxUnzippedClusterSize(std::size_t val)
void SetWriteBufferSize(std::size_t val)
std::size_t GetWriteBufferSize() const
std::size_t GetApproxZippedClusterSize() const
std::size_t GetMaxUnzippedClusterSize() const
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
void SetUseImplicitMT(EImplicitMT val)
static constexpr std::uint64_t kDefaultMaxKeySize
std::uint64_t GetMaxKeySize() const
void SetMaxUnzippedPageSize(std::size_t val)
void SetInitialUnzippedPageSize(std::size_t val)
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
virtual ~RNTupleWriteOptions()=default
void SetPageBufferBudget(std::size_t val)
void SetApproxZippedClusterSize(std::size_t val)
std::size_t GetMaxUnzippedPageSize() const
std::uint32_t GetCompression() const
void SetCompression(std::uint32_t val)
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT GetUseImplicitMT() const
std::size_t GetInitialUnzippedPageSize() const
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58