Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RSnapshotOptions.hxx
Go to the documentation of this file.
1// Author: Guilherme Amadio, Enrico Guiraud, Danilo Piparo CERN 2/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RSNAPSHOTOPTIONS
12#define ROOT_RSNAPSHOTOPTIONS
13
14#include <Compression.h>
15#include <string_view>
16#include <string>
17
18namespace ROOT {
19
20namespace RDF {
23 kTTree,
25};
26
27// clang-format off
28/**
29\struct ROOT::RDF::RSnapshotOptions
30\brief A collection of options to steer the creation of the dataset on disk through Snapshot().
31
32Some settings are output format-dependent. Please refer to the table below for an overview of all options, and to which
33output format they apply.
34
35Note that for RNTuple, the defaults correspond to those set in RNTupleWriteOptions.
36
37<table>
38<thead>
39<tr>
40<th>Format</th>
41<th>Option</th>
42<th>Type</th>
43<th>Default</th>
44<th>Description</th>
45</tr>
46</thead>
47<tbody>
48<tr>
49<td rowspan="8">All</td>
50<td><code>fMode</code></td>
51<td><code>std::string</code></td>
52<td>&quot;RECREATE&quot;</td>
53<td>Creation mode for the output TFile</td>
54</tr>
55<tr>
56<td><code>fCompressionAlgorithm</code></td>
57<td><code>ROOT::RCompressionSetting::EAlgorithm</code></td>
58<td>Zstd</td>
59<td>Compression algorithm for the output dataset</td>
60</tr>
61<tr>
62<td><code>fCompressionLevel</code></td>
63<td><code>int</code></td>
64<td>5</td>
65<td>Compression level for the output dataset</td>
66</tr>
67<tr>
68<td><code>fOutputFormat</code></td>
69<td><code>ROOT::RDF::ESnapshotOutputFormat</code></td>
70<td>TTree</td>
71<td>Which output data format to use</td>
72</tr>
73<tr>
74<td><code>fLazy</code></td>
75<td><code>bool</code></td>
76<td>False</td>
77<td>Whether to immediately start the event loop when Snapshot() is called</td>
78</tr>
79<tr>
80<td><code>fOverwriteIfExists</code></td>
81<td><code>bool</code></td>
82<td>False</td>
83<td>If <code>fMode</code> is &quot;UPDATE&quot;, overwrite the object with the same name in the output file already present</td>
84</tr>
85<tr>
86<td><code>fVector2RVec</code></td>
87<td><code>bool</code></td>
88<td>True</td>
89<td>Store <code>std::vector</code>-type columns as <code>ROOT::RVec</code> in the output</td>
90</tr>
91<tr>
92<td><code>fIncludeInVariations</code></td>
93<td><code>bool</code></td>
94<td>False</td>
95<td>Include columns that result from a Vary() action in the output</td>
96</tr>
97<tr>
98<td rowspan="3">TTree</td>
99<td><code>fAutoFlush</code></td>
100<td><code>int</code></td>
101<td>0</td>
102<td>AutoFlush setting for the output (see TTree::SetAutoFlush())</td>
103</tr>
104<tr>
105<td><code>fSplitLevel</code></td>
106<td><code>int</code></td>
107<td>99</td>
108<td>Split level of the output branches</td>
109</tr>
110<tr>
111<td><code>fBasketSize</code></td>
112<td><code>int</code></td>
113<td>-1</td>
114<td>Output basket size (a value of -1 means the TTree default of 32000 B used)</td>
115</tr>
116<tr>
117<td rowspan="6">RNTuple</td>
118<td><code>fApproxZippedClusterSize</code></td>
119<td><code>std::size_t</code></td>
120<td>128 MiB</td>
121<td>Approximate output compressed cluster size</td>
122</tr>
123<tr>
124<td><code>fMaxUnzippedClusterSize</code></td>
125<td><code>std::size_t</code></td>
126<td>1280 MiB</td>
127<td>Maximum uncompressed output cluster size</td>
128</tr>
129<tr>
130<td><code>fInitialUnzippedPageSize</code></td>
131<td><code>std::size_t</code></td>
132<td>256 B</td>
133<td>Initial output page size before compression</td>
134</tr>
135<tr>
136<td><code>fMaxUnzippedPageSize</code></td>
137<td><code>std::size_t</code></td>
138<td>1 MiB</td>
139<td>Maximum allowed output page size before compression</td>
140</tr>
141<tr>
142<td><code>fEnablePageChecksums</code></td>
143<td><code>bool</code></td>
144<td>True</td>
145<td>Enable checksumming for output pages</td>
146</tr>
147<tr>
148<td><code>fEnableSamePageMerging</code></td>
149<td><code>bool</code></td>
150<td>True</td>
151<td>Enable identical-page deduplication (requires page checksumming enabled)</td>
152</tr>
153</tbody>
154</table>
155*/
156// clang-format on
159 RSnapshotOptions() = default;
185 std::string fMode = "RECREATE"; ///< Mode of creation of output file
188 ROOT::RCompressionSetting::EAlgorithm::kZSTD; ///< Compression algorithm of output file
189 int fCompressionLevel = 5; ///< Compression level of output file
190 bool fLazy = false; ///< Do not start the event loop when Snapshot is called
191 bool fOverwriteIfExists = false; ///< If fMode is "UPDATE", overwrite object in output file if it already exists
192 bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk
193 bool fIncludeVariations = false; ///< Include columns that result from a Vary() action
194
195 /// *(TTree only)* AutoFlush value for output tree
196 int fAutoFlush = 0;
197 /// *(TTree only)* Split level of output tree
198 int fSplitLevel = 99;
199 /// *(TTree only)* Set a custom basket size option. For more details, see
200 /// https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header
201 int fBasketSize = -1;
202
203 /// *(RNTuple only)* Approximate target compressed cluster size
204 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
205 /// *(RNTuple only)* Maximum uncompressed cluster size
207 /// *(RNTuple only)* Initial page size before compression
208 std::size_t fInitialUnzippedPageSize = 256;
209 /// *(RNTuple only)* Maximum allowed page size before compression
210 std::size_t fMaxUnzippedPageSize = 1024 * 1024;
211 /// *(RNTuple only)* Enable checksumming for pages
213 /// *(RNTuple only)* Enable identical-page deduplication. Requires page checksumming
215};
216} // namespace RDF
217} // namespace ROOT
218
219#endif
Option_t Option_t TPoint TPoint const char mode
@ kDefault
Definition TSystem.h:243
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
A collection of options to steer the creation of the dataset on disk through Snapshot().
bool fIncludeVariations
Include columns that result from a Vary() action.
std::size_t fMaxUnzippedPageSize
*(RNTuple only)* Maximum allowed page size before compression
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
bool fEnableSamePageMerging
*(RNTuple only)* Enable identical-page deduplication. Requires page checksumming
std::string fMode
Mode of creation of output file.
bool fVector2RVec
If set to true will convert std::vector columns to RVec when saving to disk.
RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy, bool overwriteIfExists=false, bool vector2RVec=true, int basketSize=-1, std::size_t approxZippedClusterSize=128 *1024 *1024, std::size_t maxUnzippedClusterSize=10 *128 *1024 *1024, std::size_t maxUnzippedPageSize=1024 *1024, std::size_t initUnzippedPageSize=256, bool enablePageChecksums=true, bool enableSamePageMerging=true, ESnapshotOutputFormat outputFormat=ESnapshotOutputFormat::kDefault)
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
std::size_t fInitialUnzippedPageSize
*(RNTuple only)* Initial page size before compression
bool fEnablePageChecksums
*(RNTuple only)* Enable checksumming for pages
std::size_t fApproxZippedClusterSize
*(RNTuple only)* Approximate target compressed cluster size
int fSplitLevel
*(TTree only)* Split level of output tree
std::size_t fMaxUnzippedClusterSize
*(RNTuple only)* Maximum uncompressed cluster size
int fBasketSize
*(TTree only)* Set a custom basket size option.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
bool fOverwriteIfExists
If fMode is "UPDATE", overwrite object in output file if it already exists.