Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleWriteOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleWriteOptions.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleWriteOptions
17#define ROOT7_RNTupleWriteOptions
18
19#include <Compression.h>
20
21#include <cstdint>
22#include <cstddef>
23#include <memory>
24
25namespace ROOT {
26namespace Experimental {
27
28class RNTupleWriteOptions;
29
30namespace Internal {
31
33public:
34 static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
35};
36
37} // namespace Internal
38
39// clang-format off
40/**
41\class ROOT::Experimental::RNTupleWriteOptions
42\ingroup NTuple
43\brief Common user-tunable settings for storing ntuples
44
45All page sink classes need to support the common options.
46*/
47// clang-format on
49public:
50 enum class EImplicitMT {
51 kOff,
53 };
54
55 // clang-format off
56 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
57
59 // clang-format on
60
61protected:
63 /// Approximation of the target compressed cluster size
64 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
65 /// Memory limit for committing a cluster: with very high compression ratio, we need a limit
66 /// on how large the I/O buffer can grow during writing.
68 /// Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
69 /// of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
70 /// given by fMaxUnzippedPageSize or until the total page buffer limit is reached (as a sum of all page buffers).
71 /// The total write buffer limit needs to be large enough to hold the initial pages of all columns.
72 std::size_t fInitialUnzippedPageSize = 256;
73 /// Pages can grow only to the given limit in bytes.
74 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
75 /// The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
76 /// If set to zero, RNTuple will auto-adjust the budget based on the value of fApproxZippedClusterSize.
77 /// If set manually, the size needs to be large enough to hold all initial page buffers.
78 /// The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
79 /// Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
80 /// The default values are tuned for a total write memory of around 300 MB per fill context.
81 std::size_t fPageBufferBudget = 0;
82 /// Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
83 /// to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
84 bool fUseBufferedWrite = true;
85 /// Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
86 /// filesystems and platforms.
87 bool fUseDirectIO = false;
88 /// Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
89 /// performance (with Direct I/O) at a reasonable memory consumption.
90 std::size_t fWriteBufferSize = 4 * 1024 * 1024;
91 /// Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
93 /// If set, checksums will be calculated and written for every page.
95 /// If set, identical pages are deduplicated and aliased on disk. Requires page checksums.
97 /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
98 /// any payload whose size exceeds this will be split into multiple keys.
100
101public:
102
103 virtual ~RNTupleWriteOptions() = default;
104 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
105
106 std::uint32_t GetCompression() const { return fCompression; }
107 void SetCompression(std::uint32_t val) { fCompression = val; }
112
114 void SetApproxZippedClusterSize(std::size_t val);
115
117 void SetMaxUnzippedClusterSize(std::size_t val);
118
120 void SetInitialUnzippedPageSize(std::size_t val);
121
122 std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
123 void SetMaxUnzippedPageSize(std::size_t val);
124
125 std::size_t GetPageBufferBudget() const;
126 void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
127
128 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
129 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
130
131 bool GetUseDirectIO() const { return fUseDirectIO; }
132 void SetUseDirectIO(bool val) { fUseDirectIO = val; }
133
134 std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
135 void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
136
139
141 /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
143 {
147 }
148 }
149
151 void SetEnableSamePageMerging(bool val);
152
153 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
154};
155
156namespace Internal {
158{
159 options.fMaxKeySize = maxKeySize;
160}
161} // namespace Internal
162
163} // namespace Experimental
164} // namespace ROOT
165
166#endif // ROOT7_RNTupleWriteOptions
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
Common user-tunable settings for storing ntuples.
std::size_t fPageBufferBudget
The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed...
std::uint64_t fMaxKeySize
Specifies the max size of a payload storeable into a single TKey.
std::size_t fWriteBufferSize
Buffer size to use for writing to files, must be a multiple of 4096 bytes.
bool fEnablePageChecksums
If set, checksums will be calculated and written for every page.
bool fUseDirectIO
Whether to use Direct I/O for writing.
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
std::size_t fMaxUnzippedPageSize
Pages can grow only to the given limit in bytes.
std::size_t fInitialUnzippedPageSize
Initially, columns start with a page of this size.
std::size_t fApproxZippedClusterSize
Approximation of the target compressed cluster size.
std::size_t fMaxUnzippedClusterSize
Memory limit for committing a cluster: with very high compression ratio, we need a limit on how large...
bool fUseBufferedWrite
Whether to use buffered writing (with RPageSinkBuf).
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EImplicitMT fUseImplicitMT
Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is ...
void SetEnablePageChecksums(bool val)
Note that turning off page checksums will also turn off the same page merging optimization (see tunin...
static constexpr std::uint64_t kDefaultMaxKeySize
bool fEnableSamePageMerging
If set, identical pages are deduplicated and aliased on disk. Requires page checksums.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58