Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RMiniFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RMiniFile.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-12-22
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RMiniFile
14#define ROOT_RMiniFile
15
16#include <ROOT/RError.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RSpan.hxx>
20#include <Compression.h>
21#include <string_view>
22
23#include <cstdint>
24#include <cstdio>
25#include <memory>
26#include <string>
27#include <variant>
28
29class TDirectory;
30class TFileMergeInfo;
32
33namespace ROOT {
34
36
37namespace Experimental {
38
39class RFile;
40
41}
42
43namespace Internal {
44
45class RRawFile;
46
47/// Holds status information of an open ROOT file during writing
49
50// clang-format off
51/**
52\class ROOT::Internal::RMiniFileReader
53\ingroup NTuple
54\brief Read RNTuple data blocks from a TFile container, provided by a RRawFile
55
56A RRawFile is used for the byte access. The class implements a minimal subset of TFile, enough to extract
57RNTuple data keys.
58*/
59// clang-format on
61private:
62 /// The raw file used to read byte ranges
64 /// Indicates whether the file is a TFile container or an RNTuple bare file
65 bool fIsBare = false;
66 /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the
67 /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to
68 /// what happens in `RNTupleFileWriter::WriteBlob()`.
69 std::uint64_t fMaxKeySize = 0;
70
71 /// Used when the file container turns out to be a bare file
72 RResult<RNTuple> GetNTupleBare(std::string_view ntupleName);
73 /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
74 /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
75 RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
76
77 /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
78 /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
79 /// of the TFile record (the root directory).
80 /// Return 0 if the key was not found. Otherwise returns the offset of found key.
81 std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName);
82
83public:
84 RMiniFileReader() = default;
85 /// Uses the given raw file to read byte ranges
87 /// Extracts header and footer location for the RNTuple identified by ntupleName
88 RResult<RNTuple> GetNTuple(std::string_view ntupleName);
89 /// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
91 GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
92 /// Reads a given byte range from the file into the provided memory buffer.
93 /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
94 /// whose addresses are listed at the end of the first chunk.
95 /// \throw ROOT::RException if the read fails.
96 void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset);
97 /// Like ReadBuffer but returns a RResult instead of throwing.
98 ROOT::RResult<void> TryReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset);
99 /// Attempts to load the streamer info from the file.
100 void LoadStreamerInfo();
101
102 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
103 /// If the reader is not used to retrieve the anchor, we need to set the max key size manually
104 void SetMaxKeySize(std::uint64_t maxKeySize) { fMaxKeySize = maxKeySize; }
105};
106
107// clang-format off
108/**
109\class ROOT::Internal::RNTupleFileWriter
110\ingroup NTuple
111\brief Write RNTuple data blocks in a TFile or a bare file container
112
113The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile.
114Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class.
115Updating an existing TFile requires a proper TFile object. Also, writing a remote file requires a proper TFile object.
116A stand-alone version of RNTuple can remove the TFile based writer.
117*/
118// clang-format on
120public:
121 /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
122 static constexpr std::size_t kBlobKeyLen = 42;
123
124private:
125 struct RImplTFile {
126 /// A sub directory in fFile or nullptr if the data is stored in the root directory of the file
128 /// Low-level writing using a TFile
129 void Write(const void *buffer, size_t nbytes, std::int64_t offset);
130 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
131 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
132 /// argument is actually just a pointer.)
133 std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
134 operator bool() const { return fDirectory; }
135 };
136
137 struct RImplRFile {
139 std::string fDir;
140 /// Low-level writing using a TFile
141 void Write(const void *buffer, size_t nbytes, std::int64_t offset);
142 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
143 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
144 /// argument is actually just a pointer.)
145 std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
146 operator bool() const { return fFile; }
147 };
148
149 struct RImplSimple {
150 /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum
151 /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB.
152 static constexpr int kBlockAlign = 4096;
153 /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at
154 /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice,
155 /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be
156 /// aligned to kBlockAlign...
157 static constexpr std::size_t kHeaderBlockSize = 4096;
158
159 // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete.
160 unsigned char *fHeaderBlock = nullptr;
161 std::size_t fBlockSize = 0;
162 std::uint64_t fBlockOffset = 0;
163 unsigned char *fBlock = nullptr;
164
165 /// For the simplest cases, a C file stream can be used for writing
166 FILE *fFile = nullptr;
167 /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
168 bool fDirectIO = false;
169 /// Keeps track of the seek offset
170 std::uint64_t fFilePos = 0;
171 /// Keeps track of the next key offset
172 std::uint64_t fKeyOffset = 0;
173 /// Keeps track of TFile control structures, which need to be updated on committing the data set
174 std::unique_ptr<ROOT::Internal::RTFileControlBlock> fControlBlock;
175
177 RImplSimple(const RImplSimple &other) = delete;
178 RImplSimple(RImplSimple &&other) = delete;
179 RImplSimple &operator=(const RImplSimple &other) = delete;
181 ~RImplSimple();
182
183 void AllocateBuffers(std::size_t bufferSize);
184 void Flush();
185
186 /// Writes bytes in the open stream, either at fFilePos or at the given offset
187 void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1);
188 /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload.
189 /// The payload is already compressed
190 std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1,
191 std::uint64_t directoryOffset = 100, const std::string &className = "",
192 const std::string &objectName = "", const std::string &title = "");
193 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
194 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
195 /// argument is actually just a pointer.)
196 std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
197 operator bool() const { return fFile; }
198 };
199
200 template <typename T>
201 static std::uint64_t
202 ReserveBlobKey(T &caller, TFile &file, std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen]);
203
204 /// RImplSimple: for simple use cases, survives without libRIO dependency
205 /// RImplTFile: for updating existing files and for storing more than just an RNTuple in the file
206 /// RImplRFile: like RImplTFile but using RFile instead of TFile.
207 using FileType_t = std::variant<RImplSimple, RImplTFile, RImplRFile>;
209
210 /// A simple file can either be written as TFile container or as NTuple bare file
211 bool fIsBare = false;
212 /// True if this RNTuple's anchor must be stored as a hidden key (this is the case e.g. for attribute RNTuples).
213 bool fIsHidden = false;
214 /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple
215 /// writers can operate on the same file if (and only if) they use a proper TFile object for writing.
216 std::string fNTupleName;
217 /// The file name without parent directory; only required when writing with a C file stream
218 std::string fFileName;
219 /// Header and footer location of the ntuple, written on Commit()
221 /// Set of streamer info records that should be written to the file.
222 /// The RNTuple class description is always present.
224
225 explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize, bool isHidden);
226
227 /// For a TFile container written by a C file stream, write the header and TFile object
228 void WriteTFileSkeleton(int defaultCompression);
229 /// The only key that will be visible in file->ls()
230 /// Returns the size on disk of the anchor object
231 std::uint64_t WriteTFileNTupleKey(int compression);
232 /// Write the TList with the RNTuple key
233 void WriteTFileKeysList(std::uint64_t anchorSize);
234 /// Write the compressed streamer info record with the description of the RNTuple class
235 void WriteTFileStreamerInfo(int compression);
236 /// Last record in the file
237 void WriteTFileFreeList();
238 /// For a bare file, which is necessarily written by a C file stream, write file header
239 void WriteBareFileSkeleton(int defaultCompression);
240
241public:
242 /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file
243 enum class EContainerFormat {
244 kTFile, // ROOT TFile
245 kBare, // A thin envelope supporting a single RNTuple only
246 };
247
248 /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
249 /// Uses a C stream for writing
250 static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path,
251 EContainerFormat containerFormat,
252 const ROOT::RNTupleWriteOptions &options);
253 /// The directory parameter can also be a TFile object (TFile inherits from TDirectory).
254 static std::unique_ptr<RNTupleFileWriter>
255 Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize, bool isHidden);
256
257 static std::unique_ptr<RNTupleFileWriter> Append(std::string_view ntupleName, ROOT::Experimental::RFile &file,
258 std::string_view dirPath, std::uint64_t maxKeySize);
259
260 RNTupleFileWriter(const RNTupleFileWriter &other) = delete;
265
266 /// Creates a new RNTupleFileWriter with the same underlying TDirectory as this but writing to a different
267 /// RNTuple named `ntupleName`. Only one of the two writers can safely write to the file at the same time.
268 /// The RNTuple written by this cloned writer will be stored in a hidden key (this is a convenient assumption we
269 /// make now since this method is only used to create attribute RNTuples).
270 /// This method is currently only supported for TFile-based Writers and will throw an exception if that's not the
271 /// case.
272 std::unique_ptr<RNTupleFileWriter> CloneAsHidden(std::string_view ntupleName) const;
273
274 /// Seek a simple writer to offset. Note that previous data is not flushed immediately, but only by the next write
275 /// (if necessary).
276 void Seek(std::uint64_t offset);
277
278 /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header.
279 std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader);
280 /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer.
281 std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter);
282 /// Writes a new record as an RBlob key into the file
283 std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len);
284
285 /// Prepares buffer for a new record as an RBlob key at offset.
286 /// (Note that the array type is purely documentation, the argument is actually just a pointer.)
287 static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen]);
288
289 /// Reserves a new record as an RBlob key in the file. If keyBuffer is specified, it must be written *before* the
290 /// returned offset. (Note that the array type is purely documentation, the argument is actually just a pointer.)
291 std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
292 /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the
293 /// previously reserved key.
294 void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
295 /// Ensures that the streamer info records passed as argument are written to the file
297
298 /// Writes the RNTuple key to the file so that the header and footer keys can be found.
299 /// \return information about the committed anchor.
301
302 std::string_view GetNTupleName() const { return fNTupleName; }
303};
304
305} // namespace Internal
306} // namespace ROOT
307
308#endif
char name[80]
Definition TGX11.cxx:148
RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize, bool isHidden)
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
Definition RFile.hxx:252
void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Reads a given byte range from the file into the provided memory buffer.
void LoadStreamerInfo()
Attempts to load the streamer info from the file.
std::uint64_t GetMaxKeySize() const
void SetMaxKeySize(std::uint64_t maxKeySize)
If the reader is not used to retrieve the anchor, we need to set the max key size manually.
RResult< RNTuple > GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen)
Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
RResult< RNTuple > GetNTupleBare(std::string_view ntupleName)
Used when the file container turns out to be a bare file.
ROOT::RResult< void > TryReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Like ReadBuffer but returns a RResult instead of throwing.
std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName)
Searches for a key with the given name and type in the key index of the directory starting at offsetD...
ROOT::Internal::RRawFile * fRawFile
The raw file used to read byte ranges.
Definition RMiniFile.hxx:63
RResult< RNTuple > GetNTuple(std::string_view ntupleName)
Extracts header and footer location for the RNTuple identified by ntupleName.
RResult< RNTuple > GetNTupleProper(std::string_view ntuplePath)
Used when the file turns out to be a TFile container.
bool fIsBare
Indicates whether the file is a TFile container or an RNTuple bare file.
Definition RMiniFile.hxx:65
std::uint64_t fMaxKeySize
If fMaxKeySize > 0 and ReadBuffer attempts to read nbytes > maxKeySize, it will assume the blob being...
Definition RMiniFile.hxx:69
std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves a new record as an RBlob key in the file.
std::variant< RImplSimple, RImplTFile, RImplRFile > FileType_t
RImplSimple: for simple use cases, survives without libRIO dependency RImplTFile: for updating existi...
void WriteTFileStreamerInfo(int compression)
Write the compressed streamer info record with the description of the RNTuple class.
std::string_view GetNTupleName() const
std::string fNTupleName
The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple wr...
void WriteTFileKeysList(std::uint64_t anchorSize)
Write the TList with the RNTuple key.
std::uint64_t WriteTFileNTupleKey(int compression)
The only key that will be visible in file->ls() Returns the size on disk of the anchor object.
RNTupleFileWriter(const RNTupleFileWriter &other)=delete
void WriteBareFileSkeleton(int defaultCompression)
For a bare file, which is necessarily written by a C file stream, write file header.
RNTupleFileWriter & operator=(const RNTupleFileWriter &other)=delete
RNTupleFileWriter(RNTupleFileWriter &&other)=delete
std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed h...
std::string fFileName
The file name without parent directory; only required when writing with a C file stream.
static std::uint64_t ReserveBlobKey(T &caller, TFile &file, std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen])
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize, bool isHidden)
The directory parameter can also be a TFile object (TFile inherits from TDirectory).
void WriteTFileFreeList()
Last record in the file.
std::unique_ptr< RNTupleFileWriter > CloneAsHidden(std::string_view ntupleName) const
Creates a new RNTupleFileWriter with the same underlying TDirectory as this but writing to a differen...
void WriteTFileSkeleton(int defaultCompression)
For a TFile container written by a C file stream, write the header and TFile object.
RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize, bool isHidden)
void Seek(std::uint64_t offset)
Seek a simple writer to offset.
bool fIsBare
A simple file can either be written as TFile container or as NTuple bare file.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap
Set of streamer info records that should be written to the file.
RNTupleFileWriter & operator=(RNTupleFileWriter &&other)=delete
std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len)
Writes a new record as an RBlob key into the file.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const ROOT::RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
Write into a reserved record; the caller is responsible for making sure that the written byte range i...
static constexpr std::size_t kBlobKeyLen
The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
RNTupleLink Commit(int compression=RCompressionSetting::EDefaults::kUseGeneralPurpose)
Writes the RNTuple key to the file so that the header and footer keys can be found.
static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen])
Prepares buffer for a new record as an RBlob key at offset.
std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed f...
bool fIsHidden
True if this RNTuple's anchor must be stored as a hidden key (this is the case e.g....
void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
Ensures that the streamer info records passed as argument are written to the file.
RNTuple fNTupleAnchor
Header and footer location of the ntuple, written on Commit().
EContainerFormat
For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
Describe directory structure in memory.
Definition TDirectory.h:45
A class to pass information from the TFileMerger to the objects being merged.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
Abstract Interface class describing Streamer information for one class.
Namespace for ROOT features in testing.
Definition TROOT.h:100
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
void AllocateBuffers(std::size_t bufferSize)
bool fDirectIO
Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
RImplSimple & operator=(RImplSimple &&other)=delete
std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset=-1, std::uint64_t directoryOffset=100, const std::string &className="", const std::string &objectName="", const std::string &title="")
Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the ...
RImplSimple & operator=(const RImplSimple &other)=delete
static constexpr int kBlockAlign
Direct I/O requires that all buffers and write lengths are aligned.
std::unique_ptr< ROOT::Internal::RTFileControlBlock > fControlBlock
Keeps track of TFile control structures, which need to be updated on committing the data set.
void Write(const void *buffer, size_t nbytes, std::int64_t offset=-1)
Writes bytes in the open stream, either at fFilePos or at the given offset.
static constexpr std::size_t kHeaderBlockSize
During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at fSeek...
std::uint64_t fFilePos
Keeps track of the seek offset.
RImplSimple(const RImplSimple &other)=delete
FILE * fFile
For the simplest cases, a C file stream can be used for writing.
std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
std::uint64_t fKeyOffset
Keeps track of the next key offset.
TDirectory * fDirectory
A sub directory in fFile or nullptr if the data is stored in the root directory of the file.
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
If a TFile container is written by a C stream (simple file), on dataset commit, the file header and t...
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58