Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleOptions.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleOptions.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-08-25
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleOptions
17#define ROOT7_RNTupleOptions
18
19#include <Compression.h>
20#include <ROOT/RNTupleUtil.hxx>
21
22#include <memory>
23
24namespace ROOT {
25namespace Experimental {
26
27// clang-format off
28/**
29\class ROOT::Experimental::ENTupleContainerFormat
30\ingroup NTuple
31\brief Describes the options for wrapping RNTuple data in files
32*/
33// clang-format on
35 kTFile, // ROOT TFile
36 kBare, // A thin envelope supporting a single RNTuple only
37};
38
39
40// clang-format off
41/**
42\class ROOT::Experimental::RNTupleWriteOptions
43\ingroup NTuple
44\brief Common user-tunable settings for storing ntuples
45
46All page sink classes need to support the common options.
47*/
48// clang-format on
50protected:
53 /// Approximation of the target compressed cluster size
54 std::size_t fApproxZippedClusterSize = 50 * 1000 * 1000;
55 /// Memory limit for committing a cluster: with very high compression ratio, we need a limit
56 /// on how large the I/O buffer can grow during writing.
57 std::size_t fMaxUnzippedClusterSize = 512 * 1024 * 1024;
58 /// Should be just large enough so that the compression ratio does not benefit much more from larger pages.
59 /// Unless the cluster is too small to contain a sufficiently large page, pages are
60 /// fApproxUnzippedPageSize in size and tail pages (the last page in a cluster) is between
61 /// fApproxUnzippedPageSize/2 and fApproxUnzippedPageSize * 1.5 in size.
62 std::size_t fApproxUnzippedPageSize = 64 * 1024;
63 bool fUseBufferedWrite = true;
64 /// If set, 64bit index columns are replaced by 32bit index columns. This limits the cluster size to 512MB
65 /// but it can result in smaller file sizes for data sets with many collections and lz4 or no compression.
66 bool fHasSmallClusters = false;
67
68public:
69 /// A maximum size of 512MB still allows for a vector of bool to be stored in a small cluster. This is the
70 /// worst case wrt. the maximum required size of the index column. A 32bit index column can address 512MB
71 /// of 1-bit (on disk size) bools.
72 static constexpr std::uint64_t kMaxSmallClusterSize = 512 * 1024 * 1024;
73
74 virtual ~RNTupleWriteOptions() = default;
75 virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
76
77 int GetCompression() const { return fCompression; }
78 void SetCompression(int val) { fCompression = val; }
79 void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel) {
80 fCompression = CompressionSettings(algorithm, compressionLevel);
81 }
82
85
87 void SetApproxZippedClusterSize(std::size_t val);
88
89 std::size_t GetMaxUnzippedClusterSize() const { return fMaxUnzippedClusterSize; }
90 void SetMaxUnzippedClusterSize(std::size_t val);
91
92 std::size_t GetApproxUnzippedPageSize() const { return fApproxUnzippedPageSize; }
93 void SetApproxUnzippedPageSize(std::size_t val);
94
95 bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
96 void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
97
98 bool GetHasSmallClusters() const { return fHasSmallClusters; }
99 void SetHasSmallClusters(bool val) { fHasSmallClusters = val; }
100};
101
102// clang-format off
103/**
104\class ROOT::Experimental::RNTupleWriteOptionsDaos
105\ingroup NTuple
106\brief DAOS-specific user-tunable settings for storing ntuples
107*/
108// clang-format on
110 std::string fObjectClass{"SX"};
111 /// The maximum cage size is set to the equivalent of 16 uncompressed pages - 1MiB by default. Empirically, such a
112 /// cage size yields acceptable results in throughput and page granularity for most use cases. A `fMaxCageSize` of 0
113 /// disables the caging mechanism.
115
116public:
117 ~RNTupleWriteOptionsDaos() override = default;
118 std::unique_ptr<RNTupleWriteOptions> Clone() const override
119 { return std::make_unique<RNTupleWriteOptionsDaos>(*this); }
120
121 const std::string &GetObjectClass() const { return fObjectClass; }
122 /// Set the object class used to generate OIDs that relate to user data. Any
123 /// `OC_xxx` constant defined in `daos_obj_class.h` may be used here without
124 /// the OC_ prefix.
125 void SetObjectClass(const std::string &val) { fObjectClass = val; }
126
127 uint32_t GetMaxCageSize() const { return fMaxCageSize; }
128 /// Set the upper bound for page concatenation into cages, in bytes. It is assumed
129 /// that cage size will be no smaller than the approximate uncompressed page size.
130 /// To disable page concatenation, set this value to 0.
131 void SetMaxCageSize(uint32_t cageSz) { fMaxCageSize = cageSz; }
132};
133
134// clang-format off
135/**
136\class ROOT::Experimental::RNTupleReadOptions
137\ingroup NTuple
138\brief Common user-tunable settings for reading ntuples
139
140All page source classes need to support the common options.
141*/
142// clang-format on
144public:
149 };
150
151private:
153 unsigned int fClusterBunchSize = 1;
154
155public:
158 unsigned int GetClusterBunchSize() const { return fClusterBunchSize; }
159 void SetClusterBunchSize(unsigned int val) { fClusterBunchSize = val; }
160};
161
162} // namespace Experimental
163} // namespace ROOT
164
165#endif
Common user-tunable settings for reading ntuples.
DAOS-specific user-tunable settings for storing ntuples.
void SetObjectClass(const std::string &val)
Set the object class used to generate OIDs that relate to user data.
std::unique_ptr< RNTupleWriteOptions > Clone() const override
void SetMaxCageSize(uint32_t cageSz)
Set the upper bound for page concatenation into cages, in bytes.
uint32_t fMaxCageSize
The maximum cage size is set to the equivalent of 16 uncompressed pages - 1MiB by default.
Common user-tunable settings for storing ntuples.
bool fHasSmallClusters
If set, 64bit index columns are replaced by 32bit index columns.
void SetContainerFormat(ENTupleContainerFormat val)
static constexpr std::uint64_t kMaxSmallClusterSize
A maximum size of 512MB still allows for a vector of bool to be stored in a small cluster.
void SetApproxZippedClusterSize(std::size_t val)
virtual std::unique_ptr< RNTupleWriteOptions > Clone() const
std::size_t fApproxZippedClusterSize
Approximation of the target compressed cluster size.
std::size_t fMaxUnzippedClusterSize
Memory limit for committing a cluster: with very high compression ratio, we need a limit on how large...
std::size_t fApproxUnzippedPageSize
Should be just large enough so that the compression ratio does not benefit much more from larger page...
void SetApproxUnzippedPageSize(std::size_t val)
ENTupleContainerFormat GetContainerFormat() const
void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
void SetMaxUnzippedClusterSize(std::size_t val)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name.
Definition Compression.h:85
@ kUseAnalysis
Use the default analysis setting; fast reading but poor compression ratio.
Definition Compression.h:54