Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RMiniFile.cxx
Go to the documentation of this file.
1/// \file RMiniFile.cxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-12-22
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#include "Rtypes.h"
14#include <ROOT/RConfig.hxx>
15#include <ROOT/RError.hxx>
16#include <ROOT/RMiniFile.hxx>
17#include <ROOT/RRawFile.hxx>
18#include <ROOT/RNTupleUtils.hxx>
19#include <ROOT/RNTupleZip.hxx>
22#include <ROOT/RFile.hxx>
23
24#include <Byteswap.h>
25#include <TBufferFile.h>
26#include <TDirectory.h>
27#include <TError.h>
28#include <TFile.h>
29#include <TKey.h>
30#include <TObjString.h>
31#include <TUUID.h>
32#include <TStreamerInfo.h>
33
34#include <xxhash.h>
35
36#include <algorithm>
37#include <cassert>
38#include <cerrno>
39#include <cstdio>
40#include <cstring>
41#include <memory>
42#include <string>
43#include <chrono>
44
45#ifdef R__LINUX
46#include <fcntl.h>
47#endif
48
49#ifndef R__LITTLE_ENDIAN
50#ifdef R__BYTESWAP
51// `R__BYTESWAP` is defined in RConfig.hxx for little-endian architectures; undefined otherwise
52#define R__LITTLE_ENDIAN 1
53#else
54#define R__LITTLE_ENDIAN 0
55#endif
56#endif /* R__LITTLE_ENDIAN */
57
62
63namespace {
64
65// The following types are used to read and write the TFile binary format
66
67/// Big-endian 16-bit unsigned integer
68class RUInt16BE {
69private:
70 std::uint16_t fValBE = 0;
71 static std::uint16_t Swap(std::uint16_t val)
72 {
73#if R__LITTLE_ENDIAN == 1
74 return RByteSwap<sizeof(val)>::bswap(val);
75#else
76 return val;
77#endif
78 }
79
80public:
81 RUInt16BE() = default;
82 explicit RUInt16BE(const std::uint16_t val) : fValBE(Swap(val)) {}
83 operator std::uint16_t() const { return Swap(fValBE); }
84 RUInt16BE &operator=(const std::uint16_t val)
85 {
86 fValBE = Swap(val);
87 return *this;
88 }
89};
90
91/// Big-endian 32-bit unsigned integer
92class RUInt32BE {
93private:
94 std::uint32_t fValBE = 0;
95 static std::uint32_t Swap(std::uint32_t val)
96 {
97#if R__LITTLE_ENDIAN == 1
98 return RByteSwap<sizeof(val)>::bswap(val);
99#else
100 return val;
101#endif
102 }
103
104public:
105 RUInt32BE() = default;
106 explicit RUInt32BE(const std::uint32_t val) : fValBE(Swap(val)) {}
107 operator std::uint32_t() const { return Swap(fValBE); }
108 RUInt32BE &operator=(const std::uint32_t val)
109 {
110 fValBE = Swap(val);
111 return *this;
112 }
113};
114
115/// Big-endian 32-bit signed integer
116class RInt32BE {
117private:
118 std::int32_t fValBE = 0;
119 static std::int32_t Swap(std::int32_t val)
120 {
121#if R__LITTLE_ENDIAN == 1
122 return RByteSwap<sizeof(val)>::bswap(val);
123#else
124 return val;
125#endif
126 }
127
128public:
129 RInt32BE() = default;
130 explicit RInt32BE(const std::int32_t val) : fValBE(Swap(val)) {}
131 operator std::int32_t() const { return Swap(fValBE); }
132 RInt32BE &operator=(const std::int32_t val)
133 {
134 fValBE = Swap(val);
135 return *this;
136 }
137};
138
139/// Big-endian 64-bit unsigned integer
140class RUInt64BE {
141private:
142 std::uint64_t fValBE = 0;
143 static std::uint64_t Swap(std::uint64_t val)
144 {
145#if R__LITTLE_ENDIAN == 1
146 return RByteSwap<sizeof(val)>::bswap(val);
147#else
148 return val;
149#endif
150 }
151
152public:
153 RUInt64BE() = default;
154 explicit RUInt64BE(const std::uint64_t val) : fValBE(Swap(val)) {}
155 operator std::uint64_t() const { return Swap(fValBE); }
156 RUInt64BE &operator=(const std::uint64_t val)
157 {
158 fValBE = Swap(val);
159 return *this;
160 }
161};
162
163#pragma pack(push, 1)
164/// A name (type, identifies, ...) in the TFile binary format
165struct RTFString {
166 unsigned char fLName{0};
167 char fData[255];
168 RTFString() = default;
169 RTFString(const std::string &str)
170 {
171 // The length of strings with 255 characters and longer are encoded with a 32-bit integer following the first
172 // byte. This is currently not handled.
173 R__ASSERT(str.length() < 255);
174 fLName = static_cast<unsigned char>(str.length());
175 memcpy(fData, str.data(), fLName);
176 }
177 std::size_t GetSize() const
178 {
179 // A length of 255 is special and means that the first byte is followed by a 32-bit integer with the actual
180 // length.
181 R__ASSERT(fLName != 255);
182 return 1 + fLName;
183 }
184};
185
186/// The timestamp format used in TFile; the default constructor initializes with the current time
187struct RTFDatetime {
188 RUInt32BE fDatetime;
189 RTFDatetime()
190 {
191 auto now = std::chrono::system_clock::now();
192 auto tt = std::chrono::system_clock::to_time_t(now);
193 auto tm = *localtime(&tt);
194 fDatetime = (tm.tm_year + 1900 - 1995) << 26 | (tm.tm_mon + 1) << 22 | tm.tm_mday << 17 | tm.tm_hour << 12 |
195 tm.tm_min << 6 | tm.tm_sec;
196 }
197 explicit RTFDatetime(RUInt32BE val) : fDatetime(val) {}
198};
199
200/// The key part of a TFile record excluding the class, object, and title names
201struct RTFKey {
202 static constexpr unsigned kBigKeyVersion = 1000;
203
204 RInt32BE fNbytes{0};
205 RUInt16BE fVersion{4};
206 RUInt32BE fObjLen{0};
207 RTFDatetime fDatetime;
208 RUInt16BE fKeyLen{0};
209 RUInt16BE fCycle{1};
210 union {
211 struct {
212 RUInt32BE fSeekKey{0};
213 RUInt32BE fSeekPdir{0};
214 } fInfoShort;
215 struct {
216 RUInt64BE fSeekKey{0};
217 RUInt64BE fSeekPdir{0};
218 } fInfoLong;
219 };
220
221 RTFKey() : fInfoLong() {}
222 RTFKey(std::uint64_t seekKey, std::uint64_t seekPdir, const RTFString &clName, const RTFString &objName,
223 const RTFString &titleName, std::size_t szObjInMem, std::size_t szObjOnDisk = 0)
224 {
225 R__ASSERT(szObjInMem <= std::numeric_limits<std::uint32_t>::max());
226 R__ASSERT(szObjOnDisk <= std::numeric_limits<std::uint32_t>::max());
227 // For writing, we alywas produce "big" keys with 64-bit SeekKey and SeekPdir.
228 fVersion = fVersion + kBigKeyVersion;
229 fObjLen = szObjInMem;
230 fKeyLen = static_cast<RUInt16BE>(GetHeaderSize() + clName.GetSize() + objName.GetSize() + titleName.GetSize());
231 fInfoLong.fSeekKey = seekKey;
232 fInfoLong.fSeekPdir = seekPdir;
233 // Depends on fKeyLen being set
234 fNbytes = fKeyLen + ((szObjOnDisk == 0) ? szObjInMem : szObjOnDisk);
235 }
236
237 std::uint32_t GetSize() const
238 {
239 // Negative size indicates a gap in the file
240 if (fNbytes < 0)
241 return -fNbytes;
242 return fNbytes;
243 }
244
245 std::uint32_t GetHeaderSize() const
246 {
247 if (fVersion >= kBigKeyVersion)
248 return 18 + sizeof(fInfoLong);
249 return 18 + sizeof(fInfoShort);
250 }
251
252 std::uint64_t GetSeekKey() const
253 {
254 if (fVersion >= kBigKeyVersion)
255 return fInfoLong.fSeekKey;
256 return fInfoShort.fSeekKey;
257 }
258};
259
260/// The TFile global header
261struct RTFHeader {
262 static constexpr unsigned kBEGIN = 100;
263 static constexpr unsigned kBigHeaderVersion = 1000000;
264
265 char fMagic[4]{'r', 'o', 'o', 't'};
266 RUInt32BE fVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
267 (ROOT_VERSION_CODE & 0xFF)};
268 RUInt32BE fBEGIN{kBEGIN};
269 union {
270 struct {
271 RUInt32BE fEND{0};
272 RUInt32BE fSeekFree{0};
273 RUInt32BE fNbytesFree{0};
274 RUInt32BE fNfree{1};
275 RUInt32BE fNbytesName{0};
276 unsigned char fUnits{4};
277 RUInt32BE fCompress{0};
278 RUInt32BE fSeekInfo{0};
279 RUInt32BE fNbytesInfo{0};
280 } fInfoShort;
281 struct {
282 RUInt64BE fEND{0};
283 RUInt64BE fSeekFree{0};
284 RUInt32BE fNbytesFree{0};
285 RUInt32BE fNfree{1};
286 RUInt32BE fNbytesName{0};
287 unsigned char fUnits{8};
288 RUInt32BE fCompress{0};
289 RUInt64BE fSeekInfo{0};
290 RUInt32BE fNbytesInfo{0};
291 } fInfoLong;
292 };
293
294 RTFHeader() : fInfoShort() {}
295 RTFHeader(int compression) : fInfoShort() { fInfoShort.fCompress = compression; }
296
297 void SetBigFile()
298 {
299 if (fVersion >= kBigHeaderVersion)
300 return;
301
302 // clang-format off
303 std::uint32_t end = fInfoShort.fEND;
304 std::uint32_t seekFree = fInfoShort.fSeekFree;
305 std::uint32_t nbytesFree = fInfoShort.fNbytesFree;
306 std::uint32_t nFree = fInfoShort.fNfree;
307 std::uint32_t nbytesName = fInfoShort.fNbytesName;
308 std::uint32_t compress = fInfoShort.fCompress;
309 std::uint32_t seekInfo = fInfoShort.fSeekInfo;
310 std::uint32_t nbytesInfo = fInfoShort.fNbytesInfo;
311 fInfoLong.fEND = end;
312 fInfoLong.fSeekFree = seekFree;
313 fInfoLong.fNbytesFree = nbytesFree;
314 fInfoLong.fNfree = nFree;
315 fInfoLong.fNbytesName = nbytesName;
316 fInfoLong.fUnits = 8;
317 fInfoLong.fCompress = compress;
318 fInfoLong.fSeekInfo = seekInfo;
319 fInfoLong.fNbytesInfo = nbytesInfo;
320 fVersion = fVersion + kBigHeaderVersion;
321 // clang-format on
322 }
323
324 bool IsBigFile(std::uint64_t offset = 0) const
325 {
326 return (fVersion >= kBigHeaderVersion) ||
327 (offset > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max()));
328 }
329
330 std::uint32_t GetSize() const
331 {
332 std::uint32_t sizeHead = sizeof(fMagic) + sizeof(fVersion) + sizeof(fBEGIN);
333 if (IsBigFile())
334 return sizeHead + sizeof(fInfoLong);
335 return sizeHead + sizeof(fInfoShort);
336 }
337
338 std::uint64_t GetEnd() const
339 {
340 if (IsBigFile())
341 return fInfoLong.fEND;
342 return fInfoShort.fEND;
343 }
344
345 void SetEnd(std::uint64_t value)
346 {
347 if (IsBigFile(value)) {
348 SetBigFile();
349 fInfoLong.fEND = value;
350 } else {
351 fInfoShort.fEND = value;
352 }
353 }
354
355 std::uint64_t GetSeekFree() const
356 {
357 if (IsBigFile())
358 return fInfoLong.fSeekFree;
359 return fInfoShort.fSeekFree;
360 }
361
362 void SetSeekFree(std::uint64_t value)
363 {
364 if (IsBigFile(value)) {
365 SetBigFile();
366 fInfoLong.fSeekFree = value;
367 } else {
368 fInfoShort.fSeekFree = value;
369 }
370 }
371
372 void SetNbytesFree(std::uint32_t value)
373 {
374 if (IsBigFile()) {
375 fInfoLong.fNbytesFree = value;
376 } else {
377 fInfoShort.fNbytesFree = value;
378 }
379 }
380
381 void SetNbytesName(std::uint32_t value)
382 {
383 if (IsBigFile()) {
384 fInfoLong.fNbytesName = value;
385 } else {
386 fInfoShort.fNbytesName = value;
387 }
388 }
389
390 std::uint64_t GetSeekInfo() const
391 {
392 if (IsBigFile())
393 return fInfoLong.fSeekInfo;
394 return fInfoShort.fSeekInfo;
395 }
396
397 void SetSeekInfo(std::uint64_t value)
398 {
399 if (IsBigFile(value)) {
400 SetBigFile();
401 fInfoLong.fSeekInfo = value;
402 } else {
403 fInfoShort.fSeekInfo = value;
404 }
405 }
406
407 std::uint64_t GetNbytesInfo() const
408 {
409 if (IsBigFile())
410 return fInfoLong.fNbytesInfo;
411 return fInfoShort.fNbytesInfo;
412 }
413
414 void SetNbytesInfo(std::uint32_t value)
415 {
416 if (IsBigFile()) {
417 fInfoLong.fNbytesInfo = value;
418 } else {
419 fInfoShort.fNbytesInfo = value;
420 }
421 }
422
423 void SetCompression(std::uint32_t value)
424 {
425 if (IsBigFile()) {
426 fInfoLong.fCompress = value;
427 } else {
428 fInfoShort.fCompress = value;
429 }
430 }
431};
432
433/// A reference to an unused byte-range in a TFile
434struct RTFFreeEntry {
435 static constexpr unsigned kBigFreeEntryVersion = 1000;
436
437 RUInt16BE fVersion{1};
438 union {
439 struct {
440 RUInt32BE fFirst{0};
441 RUInt32BE fLast{0};
442 } fInfoShort;
443 struct {
444 RUInt64BE fFirst{0};
445 RUInt64BE fLast{0};
446 } fInfoLong;
447 };
448
449 RTFFreeEntry() : fInfoShort() {}
450 void Set(std::uint64_t first, std::uint64_t last)
451 {
452 if (last > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
453 fVersion = fVersion + kBigFreeEntryVersion;
454 fInfoLong.fFirst = first;
455 fInfoLong.fLast = last;
456 } else {
457 fInfoShort.fFirst = first;
458 fInfoShort.fLast = last;
459 }
460 }
461 std::uint32_t GetSize() { return (fVersion >= kBigFreeEntryVersion) ? 18 : 10; }
462};
463
464/// The header of the directory key index
465struct RTFKeyList {
466 RUInt32BE fNKeys;
467 std::uint32_t GetSize() const { return sizeof(RTFKeyList); }
468 explicit RTFKeyList(std::uint32_t nKeys) : fNKeys(nKeys) {}
469};
470
471/// A streamed TDirectory (TFile) object
472struct RTFDirectory {
473 static constexpr unsigned kBigFileVersion = 1000;
474
475 RUInt16BE fClassVersion{5};
476 RTFDatetime fDateC;
477 RTFDatetime fDateM;
478 RUInt32BE fNBytesKeys{0};
479 RUInt32BE fNBytesName{0};
480 // The version of the key has to tell whether offsets are 32bit or 64bit long
481 union {
482 struct {
483 RUInt32BE fSeekDir{RTFHeader::kBEGIN};
484 RUInt32BE fSeekParent{0};
485 RUInt32BE fSeekKeys{0};
486 } fInfoShort;
487 struct {
488 RUInt64BE fSeekDir{RTFHeader::kBEGIN};
489 RUInt64BE fSeekParent{0};
490 RUInt64BE fSeekKeys{0};
491 } fInfoLong;
492 };
493
494 RTFDirectory() : fInfoShort() {}
495
496 // In case of a short TFile record (<2G), 3 padding ints are written after the UUID
497 std::uint32_t GetSize() const
498 {
499 if (fClassVersion >= kBigFileVersion)
500 return sizeof(RTFDirectory);
501 return 18 + sizeof(fInfoShort);
502 }
503
504 std::uint64_t GetSeekKeys() const
505 {
506 if (fClassVersion >= kBigFileVersion)
507 return fInfoLong.fSeekKeys;
508 return fInfoShort.fSeekKeys;
509 }
510
511 void SetSeekKeys(std::uint64_t seekKeys)
512 {
513 if (seekKeys > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
514 std::uint32_t seekDir = fInfoShort.fSeekDir;
515 std::uint32_t seekParent = fInfoShort.fSeekParent;
516 fInfoLong.fSeekDir = seekDir;
517 fInfoLong.fSeekParent = seekParent;
518 fInfoLong.fSeekKeys = seekKeys;
519 fClassVersion = fClassVersion + kBigFileVersion;
520 } else {
521 fInfoShort.fSeekKeys = seekKeys;
522 }
523 }
524};
525
526/// A zero UUID stored at the end of the TFile record
527struct RTFUUID {
528 RUInt16BE fVersionClass{1};
529 unsigned char fUUID[16];
530
531 RTFUUID()
532 {
533 TUUID uuid{TUUID::UUIDv4()};
534 char *buffer = reinterpret_cast<char *>(this);
535 uuid.FillBuffer(buffer);
536 assert(reinterpret_cast<RTFUUID *>(buffer) <= (this + 1));
537 }
538 std::uint32_t GetSize() const { return sizeof(RTFUUID); }
539};
540
541/// A streamed RNTuple class
542///
543/// NOTE: this must be kept in sync with RNTuple.hxx.
544/// Aside ensuring consistency between the two classes' members, you need to make sure
545/// that fVersionClass matches the class version of RNTuple.
546struct RTFNTuple {
547 RUInt32BE fByteCount{0x40000000 | (sizeof(RTFNTuple) - sizeof(fByteCount))};
548 RUInt16BE fVersionClass{2};
549 RUInt16BE fVersionEpoch{0};
550 RUInt16BE fVersionMajor{0};
551 RUInt16BE fVersionMinor{0};
552 RUInt16BE fVersionPatch{0};
553 RUInt64BE fSeekHeader{0};
554 RUInt64BE fNBytesHeader{0};
555 RUInt64BE fLenHeader{0};
556 RUInt64BE fSeekFooter{0};
557 RUInt64BE fNBytesFooter{0};
558 RUInt64BE fLenFooter{0};
559 RUInt64BE fMaxKeySize{0};
560
561 static constexpr std::uint32_t GetSizePlusChecksum() { return sizeof(RTFNTuple) + sizeof(std::uint64_t); }
562
563 RTFNTuple() = default;
564 explicit RTFNTuple(const ROOT::RNTuple &inMemoryAnchor)
565 {
566 fVersionEpoch = inMemoryAnchor.GetVersionEpoch();
567 fVersionMajor = inMemoryAnchor.GetVersionMajor();
568 fVersionMinor = inMemoryAnchor.GetVersionMinor();
569 fVersionPatch = inMemoryAnchor.GetVersionPatch();
570 fSeekHeader = inMemoryAnchor.GetSeekHeader();
571 fNBytesHeader = inMemoryAnchor.GetNBytesHeader();
572 fLenHeader = inMemoryAnchor.GetLenHeader();
573 fSeekFooter = inMemoryAnchor.GetSeekFooter();
574 fNBytesFooter = inMemoryAnchor.GetNBytesFooter();
575 fLenFooter = inMemoryAnchor.GetLenFooter();
576 fMaxKeySize = inMemoryAnchor.GetMaxKeySize();
577 }
578 std::uint32_t GetSize() const { return sizeof(RTFNTuple); }
579 // The byte count and class version members are not checksummed
580 std::uint32_t GetOffsetCkData() { return sizeof(fByteCount) + sizeof(fVersionClass); }
581 std::uint32_t GetSizeCkData() { return GetSize() - GetOffsetCkData(); }
582 unsigned char *GetPtrCkData() { return reinterpret_cast<unsigned char *>(this) + GetOffsetCkData(); }
583};
584
585/// The bare file global header
586struct RBareFileHeader {
587 char fMagic[7]{'r', 'n', 't', 'u', 'p', 'l', 'e'};
588 RUInt32BE fRootVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
589 (ROOT_VERSION_CODE & 0xFF)};
590 RUInt32BE fFormatVersion{1};
591 RUInt32BE fCompress{0};
592 RTFNTuple fNTuple;
593 // followed by the ntuple name
594};
595#pragma pack(pop)
596
597/// The artifical class name shown for opaque RNTuple keys (see TBasket)
598constexpr char const *kBlobClassName = "RBlob";
599/// The class name of the RNTuple anchor
600constexpr char const *kNTupleClassName = "ROOT::RNTuple";
601
602} // anonymous namespace
603
604namespace ROOT {
605namespace Internal {
606/// If a TFile container is written by a C stream (simple file), on dataset commit, the file header
607/// and the TFile record need to be updated
609 RTFHeader fHeader;
610 RTFDirectory fFileRecord;
611 std::uint64_t fSeekNTuple{0}; // Remember the offset for the keys list
612 std::uint64_t fSeekFileRecord{0};
613};
614
615/// The RKeyBlob writes an invisible key into a TFile. That is, a key that is not indexed in the list of keys,
616/// like a TBasket.
617/// NOTE: out of anonymous namespace because otherwise ClassDefInline fails to compile
618/// on some platforms.
619class RKeyBlob : public TKey {
620public:
621 RKeyBlob() = default;
622
623 explicit RKeyBlob(TFile *file) : TKey(file)
624 {
625 fClassName = kBlobClassName;
626 fVersion += RTFKey::kBigKeyVersion;
627 fKeylen = Sizeof();
628 }
629
630 /// Register a new key for a data record of size nbytes
631 void Reserve(size_t nbytes, std::uint64_t *seekKey)
632 {
633 Create(nbytes);
634 *seekKey = fSeekKey;
635 }
636
637 bool WasAllocatedInAFreeSlot() const { return fLeft > 0; }
638
640};
641
642} // namespace Internal
643} // namespace ROOT
644
645// Computes how many chunks do we need to fit `nbytes` of payload, considering that the
646// first chunk also needs to house the offsets of the other chunks and no chunk can
647// be bigger than `maxChunkSize`. When saved to a TFile, each chunk is part of a separate TKey.
648static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
649{
650 constexpr size_t kChunkOffsetSize = sizeof(std::uint64_t);
651
652 assert(nbytes > maxChunkSize);
653 size_t nChunks = (nbytes + maxChunkSize - 1) / maxChunkSize;
654 assert(nChunks > 1);
655 size_t nbytesTail = nbytes % maxChunkSize;
656 size_t nbytesExtra = (nbytesTail > 0) * (maxChunkSize - nbytesTail);
657 size_t nbytesChunkOffsets = (nChunks - 1) * kChunkOffsetSize;
658 if (nbytesChunkOffsets > nbytesExtra) {
659 ++nChunks;
660 nbytesChunkOffsets += kChunkOffsetSize;
661 }
662
663 // We don't support having more chunkOffsets than what fits in one chunk.
664 // For a reasonable-sized maxKeySize it looks very unlikely that we can have more chunks
665 // than we can fit in the first `maxKeySize` bytes. E.g. for maxKeySize = 1GiB we can fit
666 // 134217728 chunk offsets, making our multi-key blob's capacity exactly 128 PiB.
667 R__ASSERT(nbytesChunkOffsets <= maxChunkSize);
668
669 return nChunks;
670}
671
673
675{
676 char ident[4];
677 ReadBuffer(ident, 4, 0);
678 if (std::string(ident, 4) == "root")
679 return GetNTupleProper(ntupleName);
680 fIsBare = true;
681 return GetNTupleBare(ntupleName);
682}
683
684/// Searches for a key with the given name and type in the key index of the given directory.
685/// Return 0 if the key was not found.
686std::uint64_t ROOT::Internal::RMiniFileReader::SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName,
687 std::string_view typeName)
688{
689 RTFDirectory directory;
690 ReadBuffer(&directory, sizeof(directory), offsetDir);
691
692 RTFKey key;
693 RUInt32BE nKeys;
694 std::uint64_t offset = directory.GetSeekKeys();
695 ReadBuffer(&key, sizeof(key), offset);
696 offset += key.fKeyLen;
697 ReadBuffer(&nKeys, sizeof(nKeys), offset);
698 offset += sizeof(nKeys);
699
700 for (unsigned int i = 0; i < nKeys; ++i) {
701 ReadBuffer(&key, sizeof(key), offset);
702 auto offsetNextKey = offset + key.fKeyLen;
703
704 offset += key.GetHeaderSize();
705 RTFString name;
706 ReadBuffer(&name, 1, offset);
707 ReadBuffer(&name, name.GetSize(), offset);
708 if (std::string_view(name.fData, name.fLName) != typeName) {
709 offset = offsetNextKey;
710 continue;
711 }
712 offset += name.GetSize();
713 ReadBuffer(&name, 1, offset);
714 ReadBuffer(&name, name.GetSize(), offset);
715 if (std::string_view(name.fData, name.fLName) == keyName) {
716 return key.GetSeekKey();
717 }
718 offset = offsetNextKey;
719 }
720
721 // Not found
722 return 0;
723}
724
726{
727 RTFHeader fileHeader;
728 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
729
730 const std::uint64_t seekKeyInfo = fileHeader.GetSeekInfo();
731
732 RTFKey key;
733 ReadBuffer(&key, sizeof(key), seekKeyInfo);
734
735 const std::uint64_t nbytesInfo = fileHeader.GetNbytesInfo() - key.fKeyLen;
736 const std::uint64_t seekInfo = seekKeyInfo + key.fKeyLen;
737 const std::uint32_t uncompLenInfo = key.fObjLen;
738 auto streamerInfo = MakeUninitArray<char>(uncompLenInfo);
739 if (nbytesInfo == uncompLenInfo) {
740 // Uncompressed
741 ReadBuffer(streamerInfo.get(), nbytesInfo, seekInfo);
742 } else {
743 auto buffer = MakeUninitArray<std::byte>(nbytesInfo);
744 ReadBuffer(buffer.get(), nbytesInfo, seekInfo);
745 RNTupleDecompressor::Unzip(buffer.get(), nbytesInfo, uncompLenInfo, streamerInfo.get());
746 }
747
748 TBufferFile buffer(TBuffer::kRead, uncompLenInfo, streamerInfo.release());
749 // This is necessary to allow the "class tags" inside the StreamerInfo list to refer to the proper offset into
750 // the buffer. Normally TFile loads the StreamerInfo via TKey::ReadObjWithBuffer, whose buffer also includes the
751 // key itself. Since we dealt with the key above already, we are only passing the payload to TBufferFile so offsets
752 // need to be patched up.
753 buffer.SetBufferDisplacement(key.fKeyLen);
754 TList streamerInfoList;
755 streamerInfoList.Streamer(buffer);
756 TObjLink *lnk = streamerInfoList.FirstLink();
757 while (lnk) {
758 auto obj = lnk->GetObject();
759 // NOTE: the last element of the streamer info list may be a TList with the IO customization rules, so we need
760 // to check before static casting.
761 if (obj->IsA() == TStreamerInfo::Class()) {
762 auto info = static_cast<TStreamerInfo *>(obj);
763 info->BuildCheck();
764 }
765 lnk = lnk->Next();
766 }
767}
768
770{
771 RTFHeader fileHeader;
772 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
773
774 RTFKey key;
775 RTFString name;
776 ReadBuffer(&key, sizeof(key), fileHeader.fBEGIN);
777 // Skip over the entire key length, including the class name, object name, and title stored in it.
778 std::uint64_t offset = fileHeader.fBEGIN + key.fKeyLen;
779 // Skip over the name and title of the TNamed preceding the TFile (root TDirectory) entry.
780 ReadBuffer(&name, 1, offset);
781 offset += name.GetSize();
782 ReadBuffer(&name, 1, offset);
783 offset += name.GetSize();
784
785 // split ntupleName by '/' character to open datasets in subdirectories.
786 std::string ntuplePathTail(ntuplePath);
787 if (!ntuplePathTail.empty() && ntuplePathTail[0] == '/')
788 ntuplePathTail = ntuplePathTail.substr(1);
789 auto pos = std::string::npos;
790 while ((pos = ntuplePathTail.find('/')) != std::string::npos) {
791 auto directoryName = ntuplePathTail.substr(0, pos);
792 ntuplePathTail.erase(0, pos + 1);
793
794 offset = SearchInDirectory(offset, directoryName, "TDirectory");
795 if (offset == 0) {
796 return R__FAIL("no directory named '" + std::string(directoryName) + "' in file '" + fRawFile->GetUrl() + "'");
797 }
798 ReadBuffer(&key, sizeof(key), offset);
799 offset = key.GetSeekKey() + key.fKeyLen;
800 }
801 // no more '/' delimiter in ntuplePath
802 auto ntupleName = ntuplePathTail;
803
804 offset = SearchInDirectory(offset, ntupleName, kNTupleClassName);
805 if (offset == 0) {
806 return R__FAIL("no RNTuple named '" + std::string(ntupleName) + "' in file '" + fRawFile->GetUrl() + "'");
807 }
808
809 ReadBuffer(&key, sizeof(key), offset);
810 offset = key.GetSeekKey() + key.fKeyLen;
811
812 // size of a RTFNTuple version 2 (min supported version); future anchor versions can grow.
813 constexpr size_t kMinNTupleSize = 78;
814 static_assert(kMinNTupleSize == RTFNTuple::GetSizePlusChecksum());
815 if (key.fObjLen < kMinNTupleSize) {
816 return R__FAIL("invalid anchor size: " + std::to_string(key.fObjLen) + " < " + std::to_string(sizeof(RTFNTuple)));
817 }
818
819 const auto objNbytes = key.GetSize() - key.fKeyLen;
820 auto res = GetNTupleProperAtOffset(offset, objNbytes, key.fObjLen);
821 return res;
822}
823
825 std::uint64_t compSize,
826 std::uint64_t uncompLen)
827{
828 // The object length can be smaller than the size of RTFNTuple if it comes from a past RNTuple class version,
829 // or larger than it if it comes from a future RNTuple class version.
830 auto bufAnchor = MakeUninitArray<unsigned char>(std::max<size_t>(uncompLen, sizeof(RTFNTuple)));
831 RTFNTuple *ntuple = new (bufAnchor.get()) RTFNTuple;
832
833 if (compSize != uncompLen) {
834 // Read into a temporary buffer
835 auto unzipBuf = MakeUninitArray<unsigned char>(std::max<size_t>(uncompLen, sizeof(RTFNTuple)));
836 ReadBuffer(unzipBuf.get(), compSize, payloadOffset);
837 // Unzip into the final buffer
838 RNTupleDecompressor::Unzip(unzipBuf.get(), compSize, uncompLen, ntuple);
839 } else {
840 ReadBuffer(ntuple, compSize, payloadOffset);
841 }
842
843 // We require that future class versions only append members and store the checksum in the last 8 bytes
844 // Checksum calculation: strip byte count, class version, fChecksum member
845 const auto lenCkData = uncompLen - ntuple->GetOffsetCkData() - sizeof(uint64_t);
846 const auto ckCalc = XXH3_64bits(ntuple->GetPtrCkData(), lenCkData);
847 uint64_t ckOnDisk;
848
849 RUInt64BE *ckOnDiskPtr = reinterpret_cast<RUInt64BE *>(bufAnchor.get() + uncompLen - sizeof(uint64_t));
850 ckOnDisk = static_cast<uint64_t>(*ckOnDiskPtr);
851 if (ckCalc != ckOnDisk) {
852 return R__FAIL("RNTuple anchor checksum mismatch");
853 }
854
855 return CreateAnchor(ntuple->fVersionEpoch, ntuple->fVersionMajor, ntuple->fVersionMinor, ntuple->fVersionPatch,
856 ntuple->fSeekHeader, ntuple->fNBytesHeader, ntuple->fLenHeader, ntuple->fSeekFooter,
857 ntuple->fNBytesFooter, ntuple->fLenFooter, ntuple->fMaxKeySize);
858}
859
861{
862 RBareFileHeader fileHeader;
863 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
864 RTFString name;
865 auto offset = sizeof(fileHeader);
866 ReadBuffer(&name, 1, offset);
867 ReadBuffer(&name, name.GetSize(), offset);
868 std::string_view foundName(name.fData, name.fLName);
869 if (foundName != ntupleName) {
870 return R__FAIL("expected RNTuple named '" + std::string(ntupleName) + "' but instead found '" +
871 std::string(foundName) + "' in file '" + fRawFile->GetUrl() + "'");
872 }
873 offset += name.GetSize();
874
875 RTFNTuple ntuple;
876 ReadBuffer(&ntuple, sizeof(ntuple), offset);
877 std::uint64_t onDiskChecksum;
878 ReadBuffer(&onDiskChecksum, sizeof(onDiskChecksum), offset + sizeof(ntuple));
879 auto checksum = XXH3_64bits(ntuple.GetPtrCkData(), ntuple.GetSizeCkData());
880 if (checksum != static_cast<uint64_t>(onDiskChecksum))
881 return R__FAIL("RNTuple bare file: anchor checksum mismatch");
882
883 return CreateAnchor(ntuple.fVersionEpoch, ntuple.fVersionMajor, ntuple.fVersionMinor, ntuple.fVersionPatch,
884 ntuple.fSeekHeader, ntuple.fNBytesHeader, ntuple.fLenHeader, ntuple.fSeekFooter,
885 ntuple.fNBytesFooter, ntuple.fLenFooter, ntuple.fMaxKeySize);
886}
887
888void ROOT::Internal::RMiniFileReader::ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
889{
890 TryReadBuffer(buffer, nbytes, offset).ThrowOnError();
891}
892
893ROOT::RResult<void> ROOT::Internal::RMiniFileReader::TryReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
894{
895 const auto ByteReadErr = [](std::size_t expected, std::size_t nread) {
896 return R__FAIL("invalid read (expected bytes: " + std::to_string(expected) + ", read: " + std::to_string(nread) +
897 ")");
898 };
899
900 size_t nread;
901 if (fMaxKeySize == 0 || nbytes <= fMaxKeySize) {
902 // Fast path: read single blob
903 nread = fRawFile->ReadAt(buffer, nbytes, offset);
904 } else {
905 // Read chunked blob. See RNTupleFileWriter::WriteBlob() for details.
906 const size_t nChunks = ComputeNumChunks(nbytes, fMaxKeySize);
907 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
908 const size_t nbytesFirstChunk = fMaxKeySize - nbytesChunkOffsets;
909 uint8_t *bufCur = reinterpret_cast<uint8_t *>(buffer);
910
911 // Read first chunk
912 nread = fRawFile->ReadAt(bufCur, fMaxKeySize, offset);
913 if (nread != fMaxKeySize)
914 return ByteReadErr(fMaxKeySize, nread);
915
916 // NOTE: we read the entire chunk in `bufCur`, but we only advance the pointer by `nbytesFirstChunk`,
917 // since the last part of `bufCur` will later be overwritten by the next chunk's payload.
918 // We do this to avoid a second ReadAt to read in the chunk offsets.
919 bufCur += nbytesFirstChunk;
920 nread -= nbytesChunkOffsets;
921
922 const auto chunkOffsets = MakeUninitArray<std::uint64_t>(nChunks - 1);
923 memcpy(chunkOffsets.get(), bufCur, nbytesChunkOffsets);
924
925 size_t remainingBytes = nbytes - nbytesFirstChunk;
926 std::uint64_t *curChunkOffset = &chunkOffsets[0];
927
928 do {
929 std::uint64_t chunkOffset;
930 RNTupleSerializer::DeserializeUInt64(curChunkOffset, chunkOffset);
931 ++curChunkOffset;
932
933 const size_t bytesToRead = std::min<size_t>(fMaxKeySize, remainingBytes);
934 // Ensure we don't read outside of the buffer
935 R__ASSERT(static_cast<size_t>(bufCur - reinterpret_cast<uint8_t *>(buffer)) <= nbytes - bytesToRead);
936
937 auto nbytesRead = fRawFile->ReadAt(bufCur, bytesToRead, chunkOffset);
938 if (nbytesRead != bytesToRead)
939 return ByteReadErr(bytesToRead, nbytesRead);
940
941 nread += bytesToRead;
942 bufCur += bytesToRead;
943 remainingBytes -= bytesToRead;
944 } while (remainingBytes > 0);
945 }
946
947 if (nread != nbytes)
948 return ByteReadErr(nbytes, nread);
949
950 return RResult<void>::Success();
951}
952
953////////////////////////////////////////////////////////////////////////////////
954
955/// Prepare a blob key in the provided buffer, which must provide space for kBlobKeyLen bytes. Note that the array type
956/// is purely documentation, the argument is actually just a pointer.
957void ROOT::Internal::RNTupleFileWriter::PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len,
958 unsigned char buffer[kBlobKeyLen])
959{
960 RTFString strClass{kBlobClassName};
961 RTFString strObject;
962 RTFString strTitle;
963 RTFKey keyHeader(offset, RTFHeader::kBEGIN, strClass, strObject, strTitle, len, nbytes);
964 R__ASSERT(keyHeader.fKeyLen == kBlobKeyLen);
965
966 // Copy structures into the buffer.
967 unsigned char *writeBuffer = buffer;
968 memcpy(writeBuffer, &keyHeader, keyHeader.GetHeaderSize());
969 writeBuffer += keyHeader.GetHeaderSize();
970 memcpy(writeBuffer, &strClass, strClass.GetSize());
971 writeBuffer += strClass.GetSize();
972 memcpy(writeBuffer, &strObject, strObject.GetSize());
973 writeBuffer += strObject.GetSize();
974 memcpy(writeBuffer, &strTitle, strTitle.GetSize());
975 writeBuffer += strTitle.GetSize();
976 R__ASSERT(writeBuffer == buffer + kBlobKeyLen);
977}
978
979////////////////////////////////////////////////////////////////////////////////
980
982
984{
985 static_assert(kHeaderBlockSize % kBlockAlign == 0, "invalid header block size");
986 if (bufferSize % kBlockAlign != 0)
987 throw RException(R__FAIL("Buffer size not a multiple of alignment: " + std::to_string(bufferSize)));
988 fBlockSize = bufferSize;
989
990 std::align_val_t blockAlign{kBlockAlign};
991 fHeaderBlock = static_cast<unsigned char *>(::operator new[](kHeaderBlockSize, blockAlign));
992 memset(fHeaderBlock, 0, kHeaderBlockSize);
993 fBlock = static_cast<unsigned char *>(::operator new[](fBlockSize, blockAlign));
994 memset(fBlock, 0, fBlockSize);
995}
996
998{
999 if (fFile)
1000 fclose(fFile);
1001
1002 std::align_val_t blockAlign{kBlockAlign};
1003 if (fHeaderBlock)
1004 ::operator delete[](fHeaderBlock, blockAlign);
1005 if (fBlock)
1006 ::operator delete[](fBlock, blockAlign);
1007}
1008
1009namespace {
1010int FSeek64(FILE *stream, std::int64_t offset, int origin)
1011{
1012#ifdef R__SEEK64
1013 return fseeko64(stream, offset, origin);
1014#else
1015 return fseek(stream, offset, origin);
1016#endif
1017}
1018} // namespace
1019
1021{
1022 // Write the last partially filled block, which may still need appropriate alignment for Direct I/O.
1023 // If it is the first block, get the updated header block.
1024 if (fBlockOffset == 0) {
1025 std::size_t headerBlockSize = kHeaderBlockSize;
1026 if (headerBlockSize > fFilePos) {
1027 headerBlockSize = fFilePos;
1028 }
1029 memcpy(fBlock, fHeaderBlock, headerBlockSize);
1030 }
1031
1032 std::size_t retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
1033 if (retval)
1034 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
1035
1036 std::size_t lastBlockSize = fFilePos - fBlockOffset;
1037 R__ASSERT(lastBlockSize <= fBlockSize);
1038 if (fDirectIO) {
1039 // Round up to a multiple of kBlockAlign.
1040 lastBlockSize += kBlockAlign - 1;
1041 lastBlockSize = (lastBlockSize / kBlockAlign) * kBlockAlign;
1042 R__ASSERT(lastBlockSize <= fBlockSize);
1043 }
1044 retval = fwrite(fBlock, 1, lastBlockSize, fFile);
1045 if (retval != lastBlockSize)
1046 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
1047
1048 // Write the (updated) header block, unless it was part of the write above.
1049 if (fBlockOffset > 0) {
1050 retval = FSeek64(fFile, 0, SEEK_SET);
1051 if (retval)
1052 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
1053
1054 retval = fwrite(fHeaderBlock, 1, kHeaderBlockSize, fFile);
1055 if (retval != RImplSimple::kHeaderBlockSize)
1056 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
1057 }
1058
1059 retval = fflush(fFile);
1060 if (retval)
1061 throw RException(R__FAIL(std::string("Flush failed: ") + strerror(errno)));
1062}
1063
1064void ROOT::Internal::RNTupleFileWriter::RImplSimple::Write(const void *buffer, size_t nbytes, std::int64_t offset)
1065{
1067 size_t retval;
1068 if ((offset >= 0) && (static_cast<std::uint64_t>(offset) != fFilePos)) {
1069 fFilePos = offset;
1070 }
1071
1072 // Keep header block to overwrite on commit.
1073 if (fFilePos < kHeaderBlockSize) {
1074 std::size_t headerBytes = nbytes;
1075 if (fFilePos + headerBytes > kHeaderBlockSize) {
1076 headerBytes = kHeaderBlockSize - fFilePos;
1077 }
1078 memcpy(fHeaderBlock + fFilePos, buffer, headerBytes);
1079 }
1080
1082
1083 while (nbytes > 0) {
1084 std::uint64_t posInBlock = fFilePos % fBlockSize;
1085 std::uint64_t blockOffset = fFilePos - posInBlock;
1086 if (blockOffset != fBlockOffset) {
1087 // Write the block.
1088 retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
1089 if (retval)
1090 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
1091
1092 retval = fwrite(fBlock, 1, fBlockSize, fFile);
1093 if (retval != fBlockSize)
1094 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
1095
1096 // Null the buffer contents for good measure.
1097 memset(fBlock, 0, fBlockSize);
1098 }
1099
1100 fBlockOffset = blockOffset;
1101 std::size_t blockSize = nbytes;
1102 if (blockSize > fBlockSize - posInBlock) {
1103 blockSize = fBlockSize - posInBlock;
1104 }
1105 memcpy(fBlock + posInBlock, buffer, blockSize);
1106 buffer = static_cast<const unsigned char *>(buffer) + blockSize;
1107 nbytes -= blockSize;
1108 fFilePos += blockSize;
1109 }
1110}
1111
1112std::uint64_t
1113ROOT::Internal::RNTupleFileWriter::RImplSimple::WriteKey(const void *buffer, std::size_t nbytes, std::size_t len,
1114 std::int64_t offset, std::uint64_t directoryOffset,
1115 const std::string &className, const std::string &objectName,
1116 const std::string &title)
1117{
1118 if (offset > 0)
1119 fKeyOffset = offset;
1120 RTFString strClass{className};
1121 RTFString strObject{objectName};
1122 RTFString strTitle{title};
1123
1124 RTFKey key(fKeyOffset, directoryOffset, strClass, strObject, strTitle, len, nbytes);
1125 Write(&key, key.GetHeaderSize(), fKeyOffset);
1126 Write(&strClass, strClass.GetSize());
1127 Write(&strObject, strObject.GetSize());
1128 Write(&strTitle, strTitle.GetSize());
1129 auto offsetData = fFilePos;
1130 // The next key starts after the data.
1131 fKeyOffset = offsetData + nbytes;
1132 if (buffer)
1133 Write(buffer, nbytes);
1134
1135 return offsetData;
1136}
1137
1138std::uint64_t ROOT::Internal::RNTupleFileWriter::RImplSimple::ReserveBlobKey(std::size_t nbytes, std::size_t len,
1139 unsigned char keyBuffer[kBlobKeyLen])
1140{
1141 if (keyBuffer) {
1142 PrepareBlobKey(fKeyOffset, nbytes, len, keyBuffer);
1143 } else {
1144 unsigned char localKeyBuffer[kBlobKeyLen];
1145 PrepareBlobKey(fKeyOffset, nbytes, len, localKeyBuffer);
1146 Write(localKeyBuffer, kBlobKeyLen, fKeyOffset);
1147 }
1148
1149 auto offsetData = fKeyOffset + kBlobKeyLen;
1150 // The next key starts after the data.
1151 fKeyOffset = offsetData + nbytes;
1152
1153 return offsetData;
1154}
1155
1156////////////////////////////////////////////////////////////////////////////////
1157
1158template <typename T>
1159std::uint64_t ROOT::Internal::RNTupleFileWriter::ReserveBlobKey(T &caller, TFile &file, std::size_t nbytes,
1160 std::size_t len, unsigned char keyBuffer[kBlobKeyLen])
1161{
1162 std::uint64_t offsetKey;
1163 ROOT::Internal::RKeyBlob keyBlob(&file);
1164 // Since it is unknown beforehand if offsetKey is beyond the 2GB limit or not,
1165 // RKeyBlob will always reserve space for a big key (version >= 1000)
1166 keyBlob.Reserve(nbytes, &offsetKey);
1167
1168 if (keyBuffer) {
1169 PrepareBlobKey(offsetKey, nbytes, len, keyBuffer);
1170 } else {
1171 unsigned char localKeyBuffer[kBlobKeyLen];
1172 PrepareBlobKey(offsetKey, nbytes, len, localKeyBuffer);
1173 caller.Write(localKeyBuffer, kBlobKeyLen, offsetKey);
1174 }
1175
1176 if (keyBlob.WasAllocatedInAFreeSlot()) {
1177 // If the key was allocated in a free slot, the last 4 bytes of its buffer contain the new size
1178 // of the remaining free slot and we need to write it to disk before the key gets destroyed at the end of the
1179 // function.
1180 caller.Write(keyBlob.GetBuffer() + nbytes, sizeof(Int_t), offsetKey + kBlobKeyLen + nbytes);
1181 }
1182
1183 auto offsetData = offsetKey + kBlobKeyLen;
1184
1185 return offsetData;
1186}
1187
1188void ROOT::Internal::RNTupleFileWriter::RImplTFile::Write(const void *buffer, size_t nbytes, std::int64_t offset)
1189{
1190 fDirectory->GetFile()->Seek(offset);
1191 bool rv = fDirectory->GetFile()->WriteBuffer((char *)(buffer), nbytes);
1192 if (rv)
1193 throw RException(R__FAIL("WriteBuffer failed."));
1194}
1195
1197 unsigned char keyBuffer[kBlobKeyLen])
1198{
1199 auto offsetData = RNTupleFileWriter::ReserveBlobKey(*this, *fDirectory->GetFile(), nbytes, len, keyBuffer);
1200 return offsetData;
1201}
1202
1203////////////////////////////////////////////////////////////////////////////////
1204
1205void ROOT::Internal::RNTupleFileWriter::RImplRFile::Write(const void *buffer, size_t nbytes, std::int64_t offset)
1206{
1208 file->Seek(offset);
1209 bool rv = file->WriteBuffer((char *)(buffer), nbytes);
1210 if (rv)
1211 throw RException(R__FAIL("WriteBuffer failed."));
1212}
1213
1215 unsigned char keyBuffer[kBlobKeyLen])
1216{
1218 auto offsetData = RNTupleFileWriter::ReserveBlobKey(*this, *file, nbytes, len, keyBuffer);
1219 return offsetData;
1220}
1221
1222////////////////////////////////////////////////////////////////////////////////
1223
1224ROOT::Internal::RNTupleFileWriter::RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize, bool hidden)
1225 : fIsHidden(hidden), fNTupleName(name)
1226{
1227 auto &fileSimple = fFile.emplace<RImplSimple>();
1228 fileSimple.fControlBlock = std::make_unique<ROOT::Internal::RTFileControlBlock>();
1229 fNTupleAnchor.fMaxKeySize = maxKeySize;
1230 auto infoRNTuple = RNTuple::Class()->GetStreamerInfo();
1231 fStreamerInfoMap[infoRNTuple->GetNumber()] = infoRNTuple;
1232}
1233
1235
1236std::unique_ptr<ROOT::Internal::RNTupleFileWriter>
1237ROOT::Internal::RNTupleFileWriter::Recreate(std::string_view ntupleName, std::string_view path,
1238 EContainerFormat containerFormat, const ROOT::RNTupleWriteOptions &options)
1239{
1240 std::string fileName(path);
1241 size_t idxDirSep = fileName.find_last_of("\\/");
1242 if (idxDirSep != std::string::npos) {
1243 fileName.erase(0, idxDirSep + 1);
1244 }
1245#ifdef R__LINUX
1246 int flags = O_WRONLY | O_CREAT | O_TRUNC;
1247#ifdef O_LARGEFILE
1248 // Add the equivalent flag that is passed by fopen64.
1249 flags |= O_LARGEFILE;
1250#endif
1251 if (options.GetUseDirectIO()) {
1252 flags |= O_DIRECT;
1253 }
1254 int fd = open(std::string(path).c_str(), flags, 0666);
1255 if (fd == -1) {
1256 throw RException(R__FAIL(std::string("open failed for file \"") + std::string(path) + "\": " + strerror(errno)));
1257 }
1258 FILE *fileStream = fdopen(fd, "wb");
1259#else
1260#ifdef R__SEEK64
1261 FILE *fileStream = fopen64(std::string(path.data(), path.size()).c_str(), "wb");
1262#else
1263 FILE *fileStream = fopen(std::string(path.data(), path.size()).c_str(), "wb");
1264#endif
1265#endif
1266 if (!fileStream) {
1267 throw RException(R__FAIL(std::string("open failed for file \"") + std::string(path) + "\": " + strerror(errno)));
1268 }
1269 // RNTupleFileWriter::RImplSimple does its own buffering, turn off additional buffering from C stdio.
1270 std::setvbuf(fileStream, nullptr, _IONBF, 0);
1271
1272 auto writer =
1273 std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, options.GetMaxKeySize(), /*hidden=*/false));
1274 RImplSimple &fileSimple = std::get<RImplSimple>(writer->fFile);
1275 fileSimple.fFile = fileStream;
1276 fileSimple.fDirectIO = options.GetUseDirectIO();
1277 fileSimple.AllocateBuffers(options.GetWriteBufferSize());
1278 writer->fFileName = fileName;
1279
1280 int defaultCompression = options.GetCompression();
1281 switch (containerFormat) {
1282 case EContainerFormat::kTFile: writer->WriteTFileSkeleton(defaultCompression); break;
1284 writer->fIsBare = true;
1285 writer->WriteBareFileSkeleton(defaultCompression);
1286 break;
1287 default: R__ASSERT(false && "Internal error: unhandled container format");
1288 }
1289
1290 return writer;
1291}
1292
1293std::unique_ptr<ROOT::Internal::RNTupleFileWriter>
1294ROOT::Internal::RNTupleFileWriter::Append(std::string_view ntupleName, TDirectory &fileOrDirectory,
1295 std::uint64_t maxKeySize, bool hidden)
1296{
1297 TFile *file = fileOrDirectory.GetFile();
1298 if (!file)
1299 throw RException(R__FAIL("invalid attempt to add an RNTuple to a directory that is not backed by a file"));
1300 assert(file->IsBinary());
1301
1302 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, maxKeySize, hidden));
1303 auto &fileProper = writer->fFile.emplace<RImplTFile>();
1304 fileProper.fDirectory = &fileOrDirectory;
1305 return writer;
1306}
1307
1308std::unique_ptr<ROOT::Internal::RNTupleFileWriter>
1310 std::string_view ntupleDir, std::uint64_t maxKeySize)
1311{
1312 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, maxKeySize, /*hidden=*/false));
1313 auto &rfile = writer->fFile.emplace<RImplRFile>();
1314 rfile.fFile = &file;
1315 R__ASSERT(ntupleDir.empty() || ntupleDir[ntupleDir.size() - 1] == '/');
1316 rfile.fDir = ntupleDir;
1317 return writer;
1318}
1319
1320std::unique_ptr<ROOT::Internal::RNTupleFileWriter>
1322{
1323 if (auto *file = std::get_if<RImplTFile>(&fFile)) {
1324 return Append(ntupleName, *file->fDirectory, fNTupleAnchor.fMaxKeySize, /* hidden= */ true);
1325 }
1326 // TODO: support also non-TFile-based writers
1327 throw ROOT::RException(R__FAIL("cannot clone a non-TFile-based RNTupleFileWriter."));
1328}
1329
1331{
1332 RImplSimple *fileSimple = std::get_if<RImplSimple>(&fFile);
1333 if (!fileSimple)
1334 throw RException(R__FAIL("invalid attempt to seek non-simple writer"));
1335
1336 fileSimple->fFilePos = offset;
1337 fileSimple->fKeyOffset = offset;
1338 // The next Write() will Flush() if necessary.
1339}
1340
1342{
1343 fStreamerInfoMap.insert(streamerInfos.cbegin(), streamerInfos.cend());
1344}
1345
1347{
1348 const auto WriteStreamerInfoToFile = [&](TFile *file) {
1349 // Make sure the streamer info records used in the RNTuple are written to the file
1351 buf.SetParent(file);
1352 for (auto [_, info] : fStreamerInfoMap)
1353 buf.TagStreamerInfo(info);
1354 };
1355
1356 ROOT::Internal::RNTupleLink anchorInfo;
1357 // NOTE: checksum length is included in the uncompressed len
1358 anchorInfo.fLength = RTFNTuple{}.GetSize() + sizeof(std::uint64_t);
1360
1361 if (auto fileProper = std::get_if<RImplTFile>(&fFile)) {
1362 // Easy case, the ROOT file header and the RNTuple streaming is taken care of by TFile
1363 fileProper->fDirectory->WriteObject(&fNTupleAnchor, fNTupleName.c_str());
1364 WriteStreamerInfoToFile(fileProper->fDirectory->GetFile());
1365 auto key = static_cast<TKey *>(fileProper->fDirectory->GetListOfKeys()->FindObject(fNTupleName.c_str()));
1366 R__ASSERT(key);
1367 anchorInfo.fLocator.SetPosition(key->GetSeekKey() + key->GetKeylen());
1368 anchorInfo.fLocator.SetNBytesOnStorage(key->GetNbytes() - key->GetKeylen());
1369 // NOTE: this must happen after FindObject(), otherwise some TFile implementations, such as TBufferMergerFile,
1370 // may reset the keys list upon write.
1371 fileProper->fDirectory->GetFile()->Write();
1372
1373 if (fIsHidden) {
1374 // Remove the anchor's key from the directory's KeysList to disallow retrieving directly the
1375 // attribute RNTuple from the TFile.
1376 fileProper->fDirectory->GetListOfKeys()->Remove(key);
1377 }
1378 } else if (auto fileRFile = std::get_if<RImplRFile>(&fFile)) {
1379 // Same as the case above but handled via RFile
1380 fileRFile->fFile->Put(fileRFile->fDir + fNTupleName, fNTupleAnchor);
1381 WriteStreamerInfoToFile(ROOT::Experimental::Internal::GetRFileTFile(*fileRFile->fFile));
1382 auto key = fileRFile->fFile->GetKeyInfo(fNTupleName);
1383 R__ASSERT(key);
1384 anchorInfo.fLocator.SetPosition(key->GetSeekKey() + key->GetNBytesKey());
1385 anchorInfo.fLocator.SetNBytesOnStorage(key->GetNBytesObj());
1386 fileRFile->fFile->Flush();
1387 } else {
1388 // Writing by C file stream: prepare the container format header and stream the RNTuple anchor object
1389 auto &fileSimple = std::get<RImplSimple>(fFile);
1390
1391 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1392 anchorInfo.fLocator.SetPosition(fileSimple.fControlBlock->fSeekNTuple);
1393
1394 if (fIsBare) {
1395 // Compute the checksum
1396 std::uint64_t checksum = XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData());
1397 memcpy(fileSimple.fHeaderBlock + fileSimple.fControlBlock->fSeekNTuple, &ntupleOnDisk, ntupleOnDisk.GetSize());
1398 memcpy(fileSimple.fHeaderBlock + fileSimple.fControlBlock->fSeekNTuple + ntupleOnDisk.GetSize(), &checksum,
1399 sizeof(checksum));
1400 fileSimple.Flush();
1401
1402 anchorInfo.fLocator.SetNBytesOnStorage(ntupleOnDisk.GetSize());
1403 } else {
1404 auto anchorSize = WriteTFileNTupleKey(compression);
1405 WriteTFileKeysList(anchorSize); // NOTE: this is written uncompressed
1406 WriteTFileStreamerInfo(compression);
1407 WriteTFileFreeList(); // NOTE: this is written uncompressed
1408
1409 // Update header and TFile record
1410 memcpy(fileSimple.fHeaderBlock, &fileSimple.fControlBlock->fHeader,
1411 fileSimple.fControlBlock->fHeader.GetSize());
1412 R__ASSERT(fileSimple.fControlBlock->fSeekFileRecord + fileSimple.fControlBlock->fFileRecord.GetSize() <
1414 memcpy(fileSimple.fHeaderBlock + fileSimple.fControlBlock->fSeekFileRecord,
1415 &fileSimple.fControlBlock->fFileRecord, fileSimple.fControlBlock->fFileRecord.GetSize());
1416
1417 fileSimple.Flush();
1418
1419 anchorInfo.fLocator.SetNBytesOnStorage(anchorSize);
1420 }
1421 }
1422
1423 return anchorInfo;
1424}
1425
1426std::uint64_t ROOT::Internal::RNTupleFileWriter::WriteBlob(const void *data, size_t nbytes, size_t len)
1427{
1428 auto writeKey = [this](const void *payload, size_t nBytes, size_t length) {
1429 std::uint64_t offset = ReserveBlob(nBytes, length);
1430 WriteIntoReservedBlob(payload, nBytes, offset);
1431 return offset;
1432 };
1433
1434 const std::uint64_t maxKeySize = fNTupleAnchor.fMaxKeySize;
1435 R__ASSERT(maxKeySize > 0);
1436 // We don't need the object length except for seeing compression ratios in TFile::Map()
1437 // Make sure that the on-disk object length fits into the TKey header.
1438 if (static_cast<std::uint64_t>(len) > static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()))
1439 len = nbytes;
1440
1441 if (nbytes <= maxKeySize) {
1442 // Fast path: only write 1 key.
1443 return writeKey(data, nbytes, len);
1444 }
1445
1446 /**
1447 * Writing a key bigger than the max allowed size. In this case we split the payload
1448 * into multiple keys, reserving the end of the first key payload for pointers to the
1449 * next ones. E.g. if a key needs to be split into 3 chunks, the first chunk will have
1450 * the format:
1451 * +--------------------+
1452 * | |
1453 * | Data |
1454 * |--------------------|
1455 * | pointer to chunk 2 |
1456 * | pointer to chunk 3 |
1457 * +--------------------+
1458 */
1459 const size_t nChunks = ComputeNumChunks(nbytes, maxKeySize);
1460 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
1461 const size_t nbytesFirstChunk = maxKeySize - nbytesChunkOffsets;
1462 // Skip writing the first chunk, it will be written last (in the file) below.
1463
1464 const uint8_t *chunkData = reinterpret_cast<const uint8_t *>(data) + nbytesFirstChunk;
1465 size_t remainingBytes = nbytes - nbytesFirstChunk;
1466
1467 const auto chunkOffsetsToWrite = MakeUninitArray<std::uint64_t>(nChunks - 1);
1468 std::uint64_t chunkOffsetIdx = 0;
1469
1470 do {
1471 const size_t bytesNextChunk = std::min<size_t>(remainingBytes, maxKeySize);
1472 const std::uint64_t offset = writeKey(chunkData, bytesNextChunk, bytesNextChunk);
1473
1474 RNTupleSerializer::SerializeUInt64(offset, &chunkOffsetsToWrite[chunkOffsetIdx]);
1475 ++chunkOffsetIdx;
1476
1477 remainingBytes -= bytesNextChunk;
1478 chunkData += bytesNextChunk;
1479
1480 } while (remainingBytes > 0);
1481
1482 // Write the first key, with part of the data and the pointers to (logically) following keys appended.
1483 const std::uint64_t firstOffset = ReserveBlob(maxKeySize, maxKeySize);
1484 WriteIntoReservedBlob(data, nbytesFirstChunk, firstOffset);
1485 const std::uint64_t chunkOffsetsOffset = firstOffset + nbytesFirstChunk;
1486 WriteIntoReservedBlob(chunkOffsetsToWrite.get(), nbytesChunkOffsets, chunkOffsetsOffset);
1487
1488 return firstOffset;
1489}
1490
1491std::uint64_t
1492ROOT::Internal::RNTupleFileWriter::ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen])
1493{
1494 // ReserveBlob cannot be used to reserve a multi-key blob
1495 R__ASSERT(nbytes <= fNTupleAnchor.GetMaxKeySize());
1496
1497 std::uint64_t offset;
1498 if (auto *fileSimple = std::get_if<RImplSimple>(&fFile)) {
1499 if (fIsBare) {
1500 offset = fileSimple->fKeyOffset;
1501 fileSimple->fKeyOffset += nbytes;
1502 } else {
1503 offset = fileSimple->ReserveBlobKey(nbytes, len, keyBuffer);
1504 }
1505 } else if (auto *fileProper = std::get_if<RImplTFile>(&fFile)) {
1506 offset = fileProper->ReserveBlobKey(nbytes, len, keyBuffer);
1507 } else {
1508 auto &fileRFile = std::get<RImplRFile>(fFile);
1509 offset = fileRFile.ReserveBlobKey(nbytes, len, keyBuffer);
1510 }
1511 return offset;
1512}
1513
1514void ROOT::Internal::RNTupleFileWriter::WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
1515{
1516 if (auto *fileSimple = std::get_if<RImplSimple>(&fFile)) {
1517 fileSimple->Write(buffer, nbytes, offset);
1518 } else if (auto *fileProper = std::get_if<RImplTFile>(&fFile)) {
1519 fileProper->Write(buffer, nbytes, offset);
1520 } else {
1521 auto &fileRFile = std::get<RImplRFile>(fFile);
1522 fileRFile.Write(buffer, nbytes, offset);
1523 }
1524}
1525
1526std::uint64_t ROOT::Internal::RNTupleFileWriter::WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
1527{
1528 auto offset = WriteBlob(data, nbytes, lenHeader);
1529 fNTupleAnchor.fLenHeader = lenHeader;
1530 fNTupleAnchor.fNBytesHeader = nbytes;
1531 fNTupleAnchor.fSeekHeader = offset;
1532 return offset;
1533}
1534
1535std::uint64_t ROOT::Internal::RNTupleFileWriter::WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
1536{
1537 auto offset = WriteBlob(data, nbytes, lenFooter);
1538 fNTupleAnchor.fLenFooter = lenFooter;
1539 fNTupleAnchor.fNBytesFooter = nbytes;
1540 fNTupleAnchor.fSeekFooter = offset;
1541 return offset;
1542}
1543
1545{
1546 RBareFileHeader bareHeader;
1547 bareHeader.fCompress = defaultCompression;
1548 auto &fileSimple = std::get<RImplSimple>(fFile);
1549 fileSimple.Write(&bareHeader, sizeof(bareHeader), 0);
1550 RTFString ntupleName{fNTupleName};
1551 fileSimple.Write(&ntupleName, ntupleName.GetSize());
1552
1553 // Write zero-initialized ntuple to reserve the space; will be overwritten on commit
1554 RTFNTuple ntupleOnDisk;
1555 fileSimple.fControlBlock->fSeekNTuple = fileSimple.fFilePos;
1556 fileSimple.Write(&ntupleOnDisk, ntupleOnDisk.GetSize());
1557 std::uint64_t checksum = 0;
1558 fileSimple.Write(&checksum, sizeof(checksum));
1559 fileSimple.fKeyOffset = fileSimple.fFilePos;
1560}
1561
1563{
1564 // The streamer info record is a TList of TStreamerInfo object. We cannot use
1565 // RNTupleSerializer::SerializeStreamerInfos because that uses TBufferIO::WriteObject.
1566 // This would prepend the streamed TList with self-decription information.
1567 // The streamer info record is just the streamed TList.
1568
1569 TList streamerInfoList;
1570 for (auto [_, info] : fStreamerInfoMap) {
1571 streamerInfoList.Add(info);
1572 }
1573
1574 // We will stream the list with a TBufferFile. When reading the streamer info records back,
1575 // the read buffer includes the key and the streamed list. Therefore, we need to start streaming
1576 // with an offset of the key length. Otherwise, the offset for referencing duplicate objects in the
1577 // buffer will point to the wrong places.
1578
1579 // Figure out key length
1580 RTFString strTList{"TList"};
1581 RTFString strStreamerInfo{"StreamerInfo"};
1582 RTFString strStreamerTitle{"Doubly linked list"};
1583 auto &fileSimple = std::get<RImplSimple>(fFile);
1584 fileSimple.fControlBlock->fHeader.SetSeekInfo(fileSimple.fKeyOffset);
1585 auto keyLen = RTFKey(fileSimple.fControlBlock->fHeader.GetSeekInfo(), RTFHeader::kBEGIN, strTList, strStreamerInfo,
1586 strStreamerTitle, 0)
1587 .fKeyLen;
1588
1589 TBufferFile buffer(TBuffer::kWrite, keyLen + 1);
1590 buffer.SetBufferOffset(keyLen);
1591 streamerInfoList.Streamer(buffer);
1592 assert(buffer.Length() > keyLen);
1593 const auto bufPayload = buffer.Buffer() + keyLen;
1594 const auto lenPayload = buffer.Length() - keyLen;
1595
1596 auto zipStreamerInfos = MakeUninitArray<unsigned char>(lenPayload);
1597 auto szZipStreamerInfos = RNTupleCompressor::Zip(bufPayload, lenPayload, compression, zipStreamerInfos.get());
1598
1599 fileSimple.WriteKey(zipStreamerInfos.get(), szZipStreamerInfos, lenPayload,
1600 fileSimple.fControlBlock->fHeader.GetSeekInfo(), RTFHeader::kBEGIN, "TList", "StreamerInfo",
1601 "Doubly linked list");
1602 fileSimple.fControlBlock->fHeader.SetNbytesInfo(fileSimple.fFilePos -
1603 fileSimple.fControlBlock->fHeader.GetSeekInfo());
1604}
1605
1607{
1608 RTFString strEmpty;
1609 RTFString strRNTupleClass{"ROOT::RNTuple"};
1610 RTFString strRNTupleName{fNTupleName};
1611 RTFString strFileName{fFileName};
1612
1613 auto &fileSimple = std::get<RImplSimple>(fFile);
1614 RTFKey keyRNTuple(fileSimple.fControlBlock->fSeekNTuple, RTFHeader::kBEGIN, strRNTupleClass, strRNTupleName,
1615 strEmpty, RTFNTuple::GetSizePlusChecksum(), anchorSize);
1616
1617 fileSimple.fControlBlock->fFileRecord.SetSeekKeys(fileSimple.fKeyOffset);
1618 RTFKeyList keyList{1};
1619 RTFKey keyKeyList(fileSimple.fControlBlock->fFileRecord.GetSeekKeys(), RTFHeader::kBEGIN, strEmpty, strFileName,
1620 strEmpty, keyList.GetSize() + keyRNTuple.fKeyLen);
1621 fileSimple.Write(&keyKeyList, keyKeyList.GetHeaderSize(), fileSimple.fControlBlock->fFileRecord.GetSeekKeys());
1622 fileSimple.Write(&strEmpty, strEmpty.GetSize());
1623 fileSimple.Write(&strFileName, strFileName.GetSize());
1624 fileSimple.Write(&strEmpty, strEmpty.GetSize());
1625 fileSimple.Write(&keyList, keyList.GetSize());
1626 fileSimple.Write(&keyRNTuple, keyRNTuple.GetHeaderSize());
1627 // Write class name, object name, and title for this key.
1628 fileSimple.Write(&strRNTupleClass, strRNTupleClass.GetSize());
1629 fileSimple.Write(&strRNTupleName, strRNTupleName.GetSize());
1630 fileSimple.Write(&strEmpty, strEmpty.GetSize());
1631 fileSimple.fControlBlock->fFileRecord.fNBytesKeys =
1632 fileSimple.fFilePos - fileSimple.fControlBlock->fFileRecord.GetSeekKeys();
1633 fileSimple.fKeyOffset = fileSimple.fFilePos;
1634}
1635
1637{
1638 auto &fileSimple = std::get<RImplSimple>(fFile);
1639 fileSimple.fControlBlock->fHeader.SetSeekFree(fileSimple.fKeyOffset);
1640 RTFString strEmpty;
1641 RTFString strFileName{fFileName};
1642 RTFFreeEntry freeEntry;
1643 RTFKey keyFreeList(fileSimple.fControlBlock->fHeader.GetSeekFree(), RTFHeader::kBEGIN, strEmpty, strFileName,
1644 strEmpty, freeEntry.GetSize());
1645 std::uint64_t firstFree = fileSimple.fControlBlock->fHeader.GetSeekFree() + keyFreeList.GetSize();
1646 freeEntry.Set(firstFree, std::max(2000000000ULL, ((firstFree / 1000000000ULL) + 1) * 1000000000ULL));
1647 fileSimple.WriteKey(&freeEntry, freeEntry.GetSize(), freeEntry.GetSize(),
1648 fileSimple.fControlBlock->fHeader.GetSeekFree(), RTFHeader::kBEGIN, "", fFileName, "");
1649 fileSimple.fControlBlock->fHeader.SetNbytesFree(fileSimple.fFilePos -
1650 fileSimple.fControlBlock->fHeader.GetSeekFree());
1651 fileSimple.fControlBlock->fHeader.SetEnd(fileSimple.fFilePos);
1652}
1653
1655{
1656 RTFString strRNTupleClass{"ROOT::RNTuple"};
1657 RTFString strRNTupleName{fNTupleName};
1658 RTFString strEmpty;
1659
1660 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1661 RUInt64BE checksum{XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData())};
1662 auto &fileSimple = std::get<RImplSimple>(fFile);
1663 fileSimple.fControlBlock->fSeekNTuple = fileSimple.fKeyOffset;
1664
1665 char keyBuf[RTFNTuple::GetSizePlusChecksum()];
1666
1667 // concatenate the RNTuple anchor with its checksum
1668 memcpy(keyBuf, &ntupleOnDisk, sizeof(RTFNTuple));
1669 memcpy(keyBuf + sizeof(RTFNTuple), &checksum, sizeof(checksum));
1670
1671 const auto sizeAnchor = sizeof(RTFNTuple) + sizeof(checksum);
1672 char zipAnchor[RTFNTuple::GetSizePlusChecksum()];
1673 auto szZipAnchor = RNTupleCompressor::Zip(keyBuf, sizeAnchor, compression, zipAnchor);
1674
1675 fileSimple.WriteKey(zipAnchor, szZipAnchor, sizeof(keyBuf), fileSimple.fControlBlock->fSeekNTuple, RTFHeader::kBEGIN,
1676 "ROOT::RNTuple", fNTupleName, "");
1677 return szZipAnchor;
1678}
1679
1681{
1682 RTFString strTFile{"TFile"};
1683 RTFString strFileName{fFileName};
1684 RTFString strEmpty;
1685
1686 auto &fileSimple = std::get<RImplSimple>(fFile);
1687 fileSimple.fControlBlock->fHeader = RTFHeader(defaultCompression);
1688
1689 RTFUUID uuid;
1690
1691 // First record of the file: the TFile object at offset kBEGIN (= 100)
1692 RTFKey keyRoot(RTFHeader::kBEGIN, 0, strTFile, strFileName, strEmpty,
1693 sizeof(RTFDirectory) + strFileName.GetSize() + strEmpty.GetSize() + uuid.GetSize());
1694 std::uint32_t nbytesName = keyRoot.fKeyLen + strFileName.GetSize() + 1;
1695 fileSimple.fControlBlock->fFileRecord.fNBytesName = nbytesName;
1696 fileSimple.fControlBlock->fHeader.SetNbytesName(nbytesName);
1697
1698 fileSimple.Write(&keyRoot, keyRoot.GetHeaderSize(), RTFHeader::kBEGIN);
1699 // Write class name, object name, and title for the TFile key.
1700 fileSimple.Write(&strTFile, strTFile.GetSize());
1701 fileSimple.Write(&strFileName, strFileName.GetSize());
1702 fileSimple.Write(&strEmpty, strEmpty.GetSize());
1703 // Write the name and title of the TNamed preceding the TFile entry.
1704 fileSimple.Write(&strFileName, strFileName.GetSize());
1705 fileSimple.Write(&strEmpty, strEmpty.GetSize());
1706 // Will be overwritten on commit
1707 fileSimple.fControlBlock->fSeekFileRecord = fileSimple.fFilePos;
1708 fileSimple.Write(&fileSimple.fControlBlock->fFileRecord, fileSimple.fControlBlock->fFileRecord.GetSize());
1709 fileSimple.Write(&uuid, uuid.GetSize());
1710
1711 // Padding bytes to allow the TFile record to grow for a big file
1712 RUInt32BE padding{0};
1713 for (int i = 0; i < 3; ++i)
1714 fileSimple.Write(&padding, sizeof(padding));
1715 fileSimple.fKeyOffset = fileSimple.fFilePos;
1716}
T ReadBuffer(TBufferFile *buf)
One of the template functions used to read objects from messages.
Definition MPSendRecv.h:158
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
#define ClassDefInlineOverride(name, id)
Definition Rtypes.h:360
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
const Int_t kBEGIN
Definition TFile.cxx:204
char name[80]
Definition TGX11.cxx:148
Binding & operator=(OUT(*fun)(void))
#define _(A, B)
Definition cfortran.h:108
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
Definition RFile.hxx:252
The RKeyBlob writes an invisible key into a TFile.
bool WasAllocatedInAFreeSlot() const
void Reserve(size_t nbytes, std::uint64_t *seekKey)
Register a new key for a data record of size nbytes.
void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Reads a given byte range from the file into the provided memory buffer.
void LoadStreamerInfo()
Attempts to load the streamer info from the file.
RResult< RNTuple > GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen)
Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
RResult< RNTuple > GetNTupleBare(std::string_view ntupleName)
Used when the file container turns out to be a bare file.
ROOT::RResult< void > TryReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Like ReadBuffer but returns a RResult instead of throwing.
std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName)
Searches for a key with the given name and type in the key index of the directory starting at offsetD...
ROOT::Internal::RRawFile * fRawFile
The raw file used to read byte ranges.
Definition RMiniFile.hxx:63
RResult< RNTuple > GetNTuple(std::string_view ntupleName)
Extracts header and footer location for the RNTuple identified by ntupleName.
RResult< RNTuple > GetNTupleProper(std::string_view ntuplePath)
Used when the file turns out to be a TFile container.
bool fIsBare
Indicates whether the file is a TFile container or an RNTuple bare file.
Definition RMiniFile.hxx:65
std::uint64_t fMaxKeySize
If fMaxKeySize > 0 and ReadBuffer attempts to read nbytes > maxKeySize, it will assume the blob being...
Definition RMiniFile.hxx:69
Helper class to compress data blocks in the ROOT compression frame format.
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves a new record as an RBlob key in the file.
void WriteTFileStreamerInfo(int compression)
Write the compressed streamer info record with the description of the RNTuple class.
std::string fNTupleName
The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple wr...
void WriteTFileKeysList(std::uint64_t anchorSize)
Write the TList with the RNTuple key.
std::uint64_t WriteTFileNTupleKey(int compression)
The only key that will be visible in file->ls() Returns the size on disk of the anchor object.
void WriteBareFileSkeleton(int defaultCompression)
For a bare file, which is necessarily written by a C file stream, write file header.
std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed h...
std::string fFileName
The file name without parent directory; only required when writing with a C file stream.
static std::uint64_t ReserveBlobKey(T &caller, TFile &file, std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen])
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize, bool isHidden)
The directory parameter can also be a TFile object (TFile inherits from TDirectory).
void WriteTFileFreeList()
Last record in the file.
std::unique_ptr< RNTupleFileWriter > CloneAsHidden(std::string_view ntupleName) const
Creates a new RNTupleFileWriter with the same underlying TDirectory as this but writing to a differen...
void WriteTFileSkeleton(int defaultCompression)
For a TFile container written by a C file stream, write the header and TFile object.
RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize, bool isHidden)
void Seek(std::uint64_t offset)
Seek a simple writer to offset.
bool fIsBare
A simple file can either be written as TFile container or as NTuple bare file.
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap
Set of streamer info records that should be written to the file.
std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len)
Writes a new record as an RBlob key into the file.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const ROOT::RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
Write into a reserved record; the caller is responsible for making sure that the written byte range i...
static constexpr std::size_t kBlobKeyLen
The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
RNTupleLink Commit(int compression=RCompressionSetting::EDefaults::kUseGeneralPurpose)
Writes the RNTuple key to the file so that the header and footer keys can be found.
static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen])
Prepares buffer for a new record as an RBlob key at offset.
std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed f...
bool fIsHidden
True if this RNTuple's anchor must be stored as a hidden key (this is the case e.g....
void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
Ensures that the streamer info records passed as argument are written to the file.
RNTuple fNTupleAnchor
Header and footer location of the ntuple, written on Commit().
EContainerFormat
For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file.
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
void SetType(ELocatorType type)
void SetPosition(std::uint64_t position)
void SetNBytesOnStorage(std::uint64_t nBytesOnStorage)
Common user-tunable settings for storing RNTuples.
std::size_t GetWriteBufferSize() const
std::uint64_t GetMaxKeySize() const
std::uint32_t GetCompression() const
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
std::uint64_t GetLenFooter() const
Definition RNTuple.hxx:139
std::uint64_t GetSeekHeader() const
Definition RNTuple.hxx:133
std::uint16_t GetVersionMajor() const
Definition RNTuple.hxx:129
std::uint64_t GetNBytesFooter() const
Definition RNTuple.hxx:138
std::uint64_t GetNBytesHeader() const
Definition RNTuple.hxx:134
std::uint16_t GetVersionMinor() const
Definition RNTuple.hxx:130
std::uint16_t GetVersionPatch() const
Definition RNTuple.hxx:131
std::uint64_t GetMaxKeySize() const
Definition RNTuple.hxx:140
std::uint64_t GetLenHeader() const
Definition RNTuple.hxx:135
static TClass * Class()
std::uint64_t GetSeekFooter() const
Definition RNTuple.hxx:137
std::uint16_t GetVersionEpoch() const
Definition RNTuple.hxx:128
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
The concrete implementation of TBuffer for writing/reading to/from a ROOT file or socket.
Definition TBufferFile.h:47
void TagStreamerInfo(TVirtualStreamerInfo *info) override
Mark the classindex of the current file as using this TStreamerInfo.
void SetBufferDisplacement() override
Definition TBufferIO.h:82
void SetParent(TObject *parent)
Set parent owning this buffer.
Definition TBuffer.cxx:269
@ kWrite
Definition TBuffer.h:73
@ kRead
Definition TBuffer.h:73
void SetBufferOffset(Int_t offset=0)
Definition TBuffer.h:93
Int_t Length() const
Definition TBuffer.h:100
char * Buffer() const
Definition TBuffer.h:96
TVirtualStreamerInfo * GetStreamerInfo(Int_t version=0, Bool_t isTransient=kFALSE) const
returns a pointer to the TVirtualStreamerInfo object for version If the object does not exist,...
Definition TClass.cxx:4657
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TFile * GetFile() const
Definition TDirectory.h:221
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
Bool_t IsBinary() const
Definition TFile.h:347
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
Int_t Sizeof() const override
Return the size in bytes of the key header structure.
Definition TKey.cxx:1358
Int_t fVersion
Key version identifier.
Definition TKey.h:41
Int_t fLeft
Number of bytes left in current segment.
Definition TKey.h:50
Short_t fKeylen
Number of bytes for the key itself.
Definition TKey.h:45
TKey(const TKey &)=delete
Long64_t fSeekKey
Location of object on file.
Definition TKey.h:47
virtual void Create(Int_t nbytes, TFile *f=nullptr)
Create a TKey object of specified size.
Definition TKey.cxx:493
virtual char * GetBuffer() const
Definition TKey.h:80
TString fClassName
Object Class name.
Definition TKey.h:49
A doubly linked list.
Definition TList.h:38
void Streamer(TBuffer &) override
Stream all objects in the collection to or from the I/O buffer.
Definition TList.cxx:1323
void Add(TObject *obj) override
Definition TList.h:81
virtual TObjLink * FirstLink() const
Definition TList.h:107
Describes a persistent version of a class.
void BuildCheck(TFile *file=nullptr, Bool_t load=kTRUE) override
Check if built and consistent with the class dictionary.
static TClass * Class()
This class defines a UUID (Universally Unique IDentifier), also known as GUIDs (Globally Unique IDent...
Definition TUUID.h:42
static TUUID UUIDv4()
Create a UUID version 4 (variant 1) UUID according to RFC 4122.
Definition TUUID.cxx:157
void FillBuffer(char *&buffer)
Stream UUID into output buffer.
Definition TUUID.cxx:306
TFile * GetRFileTFile(RFile &rfile)
Definition RFile.cxx:573
RNTuple CreateAnchor(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch, std::uint64_t seekHeader, std::uint64_t nbytesHeader, std::uint64_t lenHeader, std::uint64_t seekFooter, std::uint64_t nbytesFooter, std::uint64_t lenFooter, std::uint64_t maxKeySize)
Definition RNTuple.cxx:51
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
Helper templated class for swapping bytes; specializations for N={2,4,8} are provided below.
Definition Byteswap.h:124
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
void AllocateBuffers(std::size_t bufferSize)
bool fDirectIO
Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset=-1, std::uint64_t directoryOffset=100, const std::string &className="", const std::string &objectName="", const std::string &title="")
Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the ...
static constexpr int kBlockAlign
Direct I/O requires that all buffers and write lengths are aligned.
std::unique_ptr< ROOT::Internal::RTFileControlBlock > fControlBlock
Keeps track of TFile control structures, which need to be updated on committing the data set.
void Write(const void *buffer, size_t nbytes, std::int64_t offset=-1)
Writes bytes in the open stream, either at fFilePos or at the given offset.
static constexpr std::size_t kHeaderBlockSize
During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at fSeek...
std::uint64_t fFilePos
Keeps track of the seek offset.
FILE * fFile
For the simplest cases, a C file stream can be used for writing.
std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
std::uint64_t fKeyOffset
Keeps track of the next key offset.
TDirectory * fDirectory
A sub directory in fFile or nullptr if the data is stored in the root directory of the file.
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen]=nullptr)
Reserves an RBlob opaque key as data record and returns the offset of the record.
If a TFile container is written by a C stream (simple file), on dataset commit, the file header and t...
auto * tt
Definition textangle.C:16