Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RMiniFile.cxx
Go to the documentation of this file.
1/// \file RMiniFile.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-12-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include "Rtypes.h"
17#include <ROOT/RConfig.hxx>
18#include <ROOT/RError.hxx>
19#include <ROOT/RMiniFile.hxx>
20#include <ROOT/RRawFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
24
25#include <Byteswap.h>
26#include <TBufferFile.h>
27#include <TError.h>
28#include <TFile.h>
29#include <TKey.h>
30#include <TObjString.h>
32
33#include <xxhash.h>
34
35#include <algorithm>
36#include <cerrno>
37#include <cstdio>
38#include <cstring>
39#include <memory>
40#include <string>
41#include <chrono>
42
43#ifdef R__LINUX
44#include <fcntl.h>
45#endif
46
47#ifndef R__LITTLE_ENDIAN
48#ifdef R__BYTESWAP
49// `R__BYTESWAP` is defined in RConfig.hxx for little-endian architectures; undefined otherwise
50#define R__LITTLE_ENDIAN 1
51#else
52#define R__LITTLE_ENDIAN 0
53#endif
54#endif /* R__LITTLE_ENDIAN */
55
56namespace {
57
58// The following types are used to read and write the TFile binary format
59
60/// Big-endian 16-bit unsigned integer
61class RUInt16BE {
62private:
63 std::uint16_t fValBE = 0;
64 static std::uint16_t Swap(std::uint16_t val)
65 {
66#if R__LITTLE_ENDIAN == 1
67 return RByteSwap<sizeof(val)>::bswap(val);
68#else
69 return val;
70#endif
71 }
72
73public:
74 RUInt16BE() = default;
75 explicit RUInt16BE(const std::uint16_t val) : fValBE(Swap(val)) {}
76 operator std::uint16_t() const { return Swap(fValBE); }
77 RUInt16BE &operator=(const std::uint16_t val)
78 {
79 fValBE = Swap(val);
80 return *this;
81 }
82};
83
84/// Big-endian 32-bit unsigned integer
85class RUInt32BE {
86private:
87 std::uint32_t fValBE = 0;
88 static std::uint32_t Swap(std::uint32_t val)
89 {
90#if R__LITTLE_ENDIAN == 1
91 return RByteSwap<sizeof(val)>::bswap(val);
92#else
93 return val;
94#endif
95 }
96
97public:
98 RUInt32BE() = default;
99 explicit RUInt32BE(const std::uint32_t val) : fValBE(Swap(val)) {}
100 operator std::uint32_t() const { return Swap(fValBE); }
101 RUInt32BE &operator=(const std::uint32_t val)
102 {
103 fValBE = Swap(val);
104 return *this;
105 }
106};
107
108/// Big-endian 32-bit signed integer
109class RInt32BE {
110private:
111 std::int32_t fValBE = 0;
112 static std::int32_t Swap(std::int32_t val)
113 {
114#if R__LITTLE_ENDIAN == 1
115 return RByteSwap<sizeof(val)>::bswap(val);
116#else
117 return val;
118#endif
119 }
120
121public:
122 RInt32BE() = default;
123 explicit RInt32BE(const std::int32_t val) : fValBE(Swap(val)) {}
124 operator std::int32_t() const { return Swap(fValBE); }
125 RInt32BE &operator=(const std::int32_t val)
126 {
127 fValBE = Swap(val);
128 return *this;
129 }
130};
131
132/// Big-endian 64-bit unsigned integer
133class RUInt64BE {
134private:
135 std::uint64_t fValBE = 0;
136 static std::uint64_t Swap(std::uint64_t val)
137 {
138#if R__LITTLE_ENDIAN == 1
139 return RByteSwap<sizeof(val)>::bswap(val);
140#else
141 return val;
142#endif
143 }
144
145public:
146 RUInt64BE() = default;
147 explicit RUInt64BE(const std::uint64_t val) : fValBE(Swap(val)) {}
148 operator std::uint64_t() const { return Swap(fValBE); }
149 RUInt64BE &operator=(const std::uint64_t val)
150 {
151 fValBE = Swap(val);
152 return *this;
153 }
154};
155
156#pragma pack(push, 1)
157/// A name (type, identifies, ...) in the TFile binary format
158struct RTFString {
159 unsigned char fLName{0};
160 char fData[255];
161 RTFString() = default;
162 RTFString(const std::string &str)
163 {
164 // The length of strings with 255 characters and longer are encoded with a 32-bit integer following the first
165 // byte. This is currently not handled.
166 R__ASSERT(str.length() < 255);
167 fLName = str.length();
168 memcpy(fData, str.data(), fLName);
169 }
170 std::size_t GetSize() const
171 {
172 // A length of 255 is special and means that the first byte is followed by a 32-bit integer with the actual
173 // length.
174 R__ASSERT(fLName != 255);
175 return 1 + fLName;
176 }
177};
178
179/// The timestamp format used in TFile; the default constructor initializes with the current time
180struct RTFDatetime {
181 RUInt32BE fDatetime;
182 RTFDatetime()
183 {
184 auto now = std::chrono::system_clock::now();
185 auto tt = std::chrono::system_clock::to_time_t(now);
186 auto tm = *localtime(&tt);
187 fDatetime = (tm.tm_year + 1900 - 1995) << 26 | (tm.tm_mon + 1) << 22 | tm.tm_mday << 17 | tm.tm_hour << 12 |
188 tm.tm_min << 6 | tm.tm_sec;
189 }
190 explicit RTFDatetime(RUInt32BE val) : fDatetime(val) {}
191};
192
193/// The key part of a TFile record excluding the class, object, and title names
194struct RTFKey {
195 RInt32BE fNbytes{0};
196 RUInt16BE fVersion{4};
197 RUInt32BE fObjLen{0};
198 RTFDatetime fDatetime;
199 RUInt16BE fKeyLen{0};
200 RUInt16BE fCycle{1};
201 union {
202 struct {
203 RUInt32BE fSeekKey{0};
204 RUInt32BE fSeekPdir{0};
205 } fInfoShort;
206 struct {
207 RUInt64BE fSeekKey{0};
208 RUInt64BE fSeekPdir{0};
209 } fInfoLong;
210 };
211
212 std::uint32_t fKeyHeaderSize{18 + sizeof(fInfoShort)}; // not part of serialization
213
214 RTFKey() : fInfoShort() {}
215 RTFKey(std::uint64_t seekKey, std::uint64_t seekPdir, const RTFString &clName, const RTFString &objName,
216 const RTFString &titleName, std::size_t szObjInMem, std::size_t szObjOnDisk = 0)
217 {
218 R__ASSERT(szObjInMem <= std::numeric_limits<std::uint32_t>::max());
219 R__ASSERT(szObjOnDisk <= std::numeric_limits<std::uint32_t>::max());
220 fObjLen = szObjInMem;
221 if ((seekKey > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) ||
222 (seekPdir > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max()))) {
223 fKeyHeaderSize = 18 + sizeof(fInfoLong);
224 fKeyLen = fKeyHeaderSize + clName.GetSize() + objName.GetSize() + titleName.GetSize();
225 fInfoLong.fSeekKey = seekKey;
226 fInfoLong.fSeekPdir = seekPdir;
227 fVersion = fVersion + 1000;
228 } else {
229 fKeyHeaderSize = 18 + sizeof(fInfoShort);
230 fKeyLen = fKeyHeaderSize + clName.GetSize() + objName.GetSize() + titleName.GetSize();
231 fInfoShort.fSeekKey = seekKey;
232 fInfoShort.fSeekPdir = seekPdir;
233 }
234 fNbytes = fKeyLen + ((szObjOnDisk == 0) ? szObjInMem : szObjOnDisk);
235 }
236
237 void MakeBigKey()
238 {
239 if (fVersion >= 1000)
240 return;
241 std::uint32_t seekKey = fInfoShort.fSeekKey;
242 std::uint32_t seekPdir = fInfoShort.fSeekPdir;
243 fInfoLong.fSeekKey = seekKey;
244 fInfoLong.fSeekPdir = seekPdir;
245 fKeyHeaderSize = fKeyHeaderSize + sizeof(fInfoLong) - sizeof(fInfoShort);
246 fKeyLen = fKeyLen + sizeof(fInfoLong) - sizeof(fInfoShort);
247 fNbytes = fNbytes + sizeof(fInfoLong) - sizeof(fInfoShort);
248 fVersion = fVersion + 1000;
249 }
250
251 std::uint32_t GetSize() const
252 {
253 // Negative size indicates a gap in the file
254 if (fNbytes < 0)
255 return -fNbytes;
256 return fNbytes;
257 }
258
259 std::uint32_t GetHeaderSize() const
260 {
261 if (fVersion >= 1000)
262 return 18 + sizeof(fInfoLong);
263 return 18 + sizeof(fInfoShort);
264 }
265
266 std::uint64_t GetSeekKey() const
267 {
268 if (fVersion >= 1000)
269 return fInfoLong.fSeekKey;
270 return fInfoShort.fSeekKey;
271 }
272};
273
274/// The TFile global header
275struct RTFHeader {
276 char fMagic[4]{'r', 'o', 'o', 't'};
277 RUInt32BE fVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
278 (ROOT_VERSION_CODE & 0xFF)};
279 RUInt32BE fBEGIN{100};
280 union {
281 struct {
282 RUInt32BE fEND{0};
283 RUInt32BE fSeekFree{0};
284 RUInt32BE fNbytesFree{0};
285 RUInt32BE fNfree{1};
286 RUInt32BE fNbytesName{0};
287 unsigned char fUnits{4};
288 RUInt32BE fCompress{0};
289 RUInt32BE fSeekInfo{0};
290 RUInt32BE fNbytesInfo{0};
291 } fInfoShort;
292 struct {
293 RUInt64BE fEND{0};
294 RUInt64BE fSeekFree{0};
295 RUInt32BE fNbytesFree{0};
296 RUInt32BE fNfree{1};
297 RUInt32BE fNbytesName{0};
298 unsigned char fUnits{8};
299 RUInt32BE fCompress{0};
300 RUInt64BE fSeekInfo{0};
301 RUInt32BE fNbytesInfo{0};
302 } fInfoLong;
303 };
304
305 RTFHeader() : fInfoShort() {}
306 RTFHeader(int compression) : fInfoShort() { fInfoShort.fCompress = compression; }
307
308 void SetBigFile()
309 {
310 if (fVersion >= 1000000)
311 return;
312
313 // clang-format off
314 std::uint32_t end = fInfoShort.fEND;
315 std::uint32_t seekFree = fInfoShort.fSeekFree;
316 std::uint32_t nbytesFree = fInfoShort.fNbytesFree;
317 std::uint32_t nFree = fInfoShort.fNfree;
318 std::uint32_t nbytesName = fInfoShort.fNbytesName;
319 std::uint32_t compress = fInfoShort.fCompress;
320 std::uint32_t seekInfo = fInfoShort.fSeekInfo;
321 std::uint32_t nbytesInfo = fInfoShort.fNbytesInfo;
322 fInfoLong.fEND = end;
323 fInfoLong.fSeekFree = seekFree;
324 fInfoLong.fNbytesFree = nbytesFree;
325 fInfoLong.fNfree = nFree;
326 fInfoLong.fNbytesName = nbytesName;
327 fInfoLong.fUnits = 8;
328 fInfoLong.fCompress = compress;
329 fInfoLong.fSeekInfo = seekInfo;
330 fInfoLong.fNbytesInfo = nbytesInfo;
331 fVersion = fVersion + 1000000;
332 // clang-format on
333 }
334
335 bool IsBigFile(std::uint64_t offset = 0) const
336 {
337 return (fVersion >= 1000000) || (offset > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max()));
338 }
339
340 std::uint32_t GetSize() const
341 {
342 std::uint32_t sizeHead = 4 + sizeof(fVersion) + sizeof(fBEGIN);
343 if (IsBigFile())
344 return sizeHead + sizeof(fInfoLong);
345 return sizeHead + sizeof(fInfoShort);
346 }
347
348 std::uint64_t GetEnd() const
349 {
350 if (IsBigFile())
351 return fInfoLong.fEND;
352 return fInfoShort.fEND;
353 }
354
355 void SetEnd(std::uint64_t value)
356 {
357 if (IsBigFile(value)) {
358 SetBigFile();
359 fInfoLong.fEND = value;
360 } else {
361 fInfoShort.fEND = value;
362 }
363 }
364
365 std::uint64_t GetSeekFree() const
366 {
367 if (IsBigFile())
368 return fInfoLong.fSeekFree;
369 return fInfoShort.fSeekFree;
370 }
371
372 void SetSeekFree(std::uint64_t value)
373 {
374 if (IsBigFile(value)) {
375 SetBigFile();
376 fInfoLong.fSeekFree = value;
377 } else {
378 fInfoShort.fSeekFree = value;
379 }
380 }
381
382 void SetNbytesFree(std::uint32_t value)
383 {
384 if (IsBigFile()) {
385 fInfoLong.fNbytesFree = value;
386 } else {
387 fInfoShort.fNbytesFree = value;
388 }
389 }
390
391 void SetNbytesName(std::uint32_t value)
392 {
393 if (IsBigFile()) {
394 fInfoLong.fNbytesName = value;
395 } else {
396 fInfoShort.fNbytesName = value;
397 }
398 }
399
400 std::uint64_t GetSeekInfo() const
401 {
402 if (IsBigFile())
403 return fInfoLong.fSeekInfo;
404 return fInfoShort.fSeekInfo;
405 }
406
407 void SetSeekInfo(std::uint64_t value)
408 {
409 if (IsBigFile(value)) {
410 SetBigFile();
411 fInfoLong.fSeekInfo = value;
412 } else {
413 fInfoShort.fSeekInfo = value;
414 }
415 }
416
417 void SetNbytesInfo(std::uint32_t value)
418 {
419 if (IsBigFile()) {
420 fInfoLong.fNbytesInfo = value;
421 } else {
422 fInfoShort.fNbytesInfo = value;
423 }
424 }
425
426 void SetCompression(std::uint32_t value)
427 {
428 if (IsBigFile()) {
429 fInfoLong.fCompress = value;
430 } else {
431 fInfoShort.fCompress = value;
432 }
433 }
434};
435
436/// A reference to an unused byte-range in a TFile
437struct RTFFreeEntry {
438 RUInt16BE fVersion{1};
439 union {
440 struct {
441 RUInt32BE fFirst{0};
442 RUInt32BE fLast{0};
443 } fInfoShort;
444 struct {
445 RUInt64BE fFirst{0};
446 RUInt64BE fLast{0};
447 } fInfoLong;
448 };
449
450 RTFFreeEntry() : fInfoShort() {}
451 void Set(std::uint64_t first, std::uint64_t last)
452 {
453 if (last > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
454 fVersion = fVersion + 1000;
455 fInfoLong.fFirst = first;
456 fInfoLong.fLast = last;
457 } else {
458 fInfoShort.fFirst = first;
459 fInfoShort.fLast = last;
460 }
461 }
462 std::uint32_t GetSize() { return (fVersion >= 1000) ? 18 : 10; }
463};
464
465/// The header of the directory key index
466struct RTFKeyList {
467 RUInt32BE fNKeys;
468 std::uint32_t GetSize() const { return sizeof(RTFKeyList); }
469 explicit RTFKeyList(std::uint32_t nKeys) : fNKeys(nKeys) {}
470};
471
472/// A streamed TFile object
473struct RTFFile {
474 RUInt16BE fClassVersion{5};
475 RTFDatetime fDateC;
476 RTFDatetime fDateM;
477 RUInt32BE fNBytesKeys{0};
478 RUInt32BE fNBytesName{0};
479 // The version of the key has to tell whether offsets are 32bit or 64bit long
480 union {
481 struct {
482 RUInt32BE fSeekDir{100};
483 RUInt32BE fSeekParent{0};
484 RUInt32BE fSeekKeys{0};
485 } fInfoShort;
486 struct {
487 RUInt64BE fSeekDir{100};
488 RUInt64BE fSeekParent{0};
489 RUInt64BE fSeekKeys{0};
490 } fInfoLong;
491 };
492
493 RTFFile() : fInfoShort() {}
494
495 // In case of a short TFile record (<2G), 3 padding ints are written after the UUID
496 std::uint32_t GetSize() const
497 {
498 if (fClassVersion >= 1000)
499 return sizeof(RTFFile);
500 return 18 + sizeof(fInfoShort);
501 }
502
503 std::uint64_t GetSeekKeys() const
504 {
505 if (fClassVersion >= 1000)
506 return fInfoLong.fSeekKeys;
507 return fInfoShort.fSeekKeys;
508 }
509
510 void SetSeekKeys(std::uint64_t seekKeys)
511 {
512 if (seekKeys > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
513 std::uint32_t seekDir = fInfoShort.fSeekDir;
514 std::uint32_t seekParent = fInfoShort.fSeekParent;
515 fInfoLong.fSeekDir = seekDir;
516 fInfoLong.fSeekParent = seekParent;
517 fInfoLong.fSeekKeys = seekKeys;
518 fClassVersion = fClassVersion + 1000;
519 } else {
520 fInfoShort.fSeekKeys = seekKeys;
521 }
522 }
523};
524
525/// A zero UUID stored at the end of the TFile record
526struct RTFUUID {
527 RUInt16BE fVersionClass{1};
528 unsigned char fUUID[16] = {0};
529
530 RTFUUID() = default;
531 std::uint32_t GetSize() const { return sizeof(RTFUUID); }
532};
533
534/// A streamed RNTuple class
535///
536/// NOTE: this must be kept in sync with RNTuple.hxx.
537/// Aside ensuring consistency between the two classes' members, you need to make sure
538/// that fVersionClass matches the class version of RNTuple.
539struct RTFNTuple {
540 RUInt32BE fByteCount{0x40000000 | (sizeof(RTFNTuple) - sizeof(fByteCount))};
541 RUInt16BE fVersionClass{2};
542 RUInt16BE fVersionEpoch{0};
543 RUInt16BE fVersionMajor{0};
544 RUInt16BE fVersionMinor{0};
545 RUInt16BE fVersionPatch{0};
546 RUInt64BE fSeekHeader{0};
547 RUInt64BE fNBytesHeader{0};
548 RUInt64BE fLenHeader{0};
549 RUInt64BE fSeekFooter{0};
550 RUInt64BE fNBytesFooter{0};
551 RUInt64BE fLenFooter{0};
552 RUInt64BE fMaxKeySize{0};
553
554 static constexpr std::uint32_t GetSizePlusChecksum() { return sizeof(RTFNTuple) + sizeof(std::uint64_t); }
555
556 RTFNTuple() = default;
557 explicit RTFNTuple(const ROOT::RNTuple &inMemoryAnchor)
558 {
559 fVersionEpoch = inMemoryAnchor.GetVersionEpoch();
560 fVersionMajor = inMemoryAnchor.GetVersionMajor();
561 fVersionMinor = inMemoryAnchor.GetVersionMinor();
562 fVersionPatch = inMemoryAnchor.GetVersionPatch();
563 fSeekHeader = inMemoryAnchor.GetSeekHeader();
564 fNBytesHeader = inMemoryAnchor.GetNBytesHeader();
565 fLenHeader = inMemoryAnchor.GetLenHeader();
566 fSeekFooter = inMemoryAnchor.GetSeekFooter();
567 fNBytesFooter = inMemoryAnchor.GetNBytesFooter();
568 fLenFooter = inMemoryAnchor.GetLenFooter();
569 fMaxKeySize = inMemoryAnchor.GetMaxKeySize();
570 }
571 std::uint32_t GetSize() const { return sizeof(RTFNTuple); }
572 // The byte count and class version members are not checksummed
573 std::uint32_t GetOffsetCkData() { return sizeof(fByteCount) + sizeof(fVersionClass); }
574 std::uint32_t GetSizeCkData() { return GetSize() - GetOffsetCkData(); }
575 unsigned char *GetPtrCkData() { return reinterpret_cast<unsigned char *>(this) + GetOffsetCkData(); }
576};
577
578/// The bare file global header
579struct RBareFileHeader {
580 char fMagic[7]{'r', 'n', 't', 'u', 'p', 'l', 'e'};
581 RUInt32BE fRootVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
582 (ROOT_VERSION_CODE & 0xFF)};
583 RUInt32BE fFormatVersion{1};
584 RUInt32BE fCompress{0};
585 RTFNTuple fNTuple;
586 // followed by the ntuple name
587};
588#pragma pack(pop)
589
590/// The artifical class name shown for opaque RNTuple keys (see TBasket)
591constexpr char const *kBlobClassName = "RBlob";
592/// The class name of the RNTuple anchor
593constexpr char const *kNTupleClassName = "ROOT::RNTuple";
594
595} // anonymous namespace
596
597namespace ROOT {
598namespace Experimental {
599namespace Internal {
600/// If a TFile container is written by a C stream (simple file), on dataset commit, the file header
601/// and the TFile record need to be updated
603 RTFHeader fHeader;
604 RTFFile fFileRecord;
605 std::uint64_t fSeekNTuple{0}; // Remember the offset for the keys list
606 std::uint64_t fSeekFileRecord{0};
607};
608
609/// The RKeyBlob writes an invisible key into a TFile. That is, a key that is not indexed in the list of keys,
610/// like a TBasket.
611/// NOTE: out of anonymous namespace because otherwise ClassDefInline fails to compile
612/// on some platforms.
613class RKeyBlob : public TKey {
614public:
615 RKeyBlob() = default;
616
617 explicit RKeyBlob(TFile *file) : TKey(file)
618 {
619 fClassName = kBlobClassName;
620 fVersion += 1000;
621 fKeylen = Sizeof();
622 }
623
624 /// Register a new key for a data record of size nbytes
625 void Reserve(size_t nbytes, std::uint64_t *seekKey)
626 {
627 Create(nbytes);
628 *seekKey = fSeekKey;
629 }
630
632};
633
634} // namespace Internal
635} // namespace Experimental
636} // namespace ROOT
637
638// Computes how many chunks do we need to fit `nbytes` of payload, considering that the
639// first chunk also needs to house the offsets of the other chunks and no chunk can
640// be bigger than `maxChunkSize`. When saved to a TFile, each chunk is part of a separate TKey.
641static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
642{
643 constexpr size_t kChunkOffsetSize = sizeof(std::uint64_t);
644
645 assert(nbytes > maxChunkSize);
646 size_t nChunks = (nbytes + maxChunkSize - 1) / maxChunkSize;
647 assert(nChunks > 1);
648 size_t nbytesTail = nbytes % maxChunkSize;
649 size_t nbytesExtra = (nbytesTail > 0) * (maxChunkSize - nbytesTail);
650 size_t nbytesChunkOffsets = (nChunks - 1) * kChunkOffsetSize;
651 if (nbytesChunkOffsets > nbytesExtra) {
652 ++nChunks;
653 nbytesChunkOffsets += kChunkOffsetSize;
654 }
655
656 // We don't support having more chunkOffsets than what fits in one chunk.
657 // For a reasonable-sized maxKeySize it looks very unlikely that we can have more chunks
658 // than we can fit in the first `maxKeySize` bytes. E.g. for maxKeySize = 1GiB we can fit
659 // 134217728 chunk offsets, making our multi-key blob's capacity exactly 128 PiB.
660 R__ASSERT(nbytesChunkOffsets <= maxChunkSize);
661
662 return nChunks;
663}
664
666
669{
670 char ident[4];
671 ReadBuffer(ident, 4, 0);
672 if (std::string(ident, 4) == "root")
673 return GetNTupleProper(ntupleName);
674 fIsBare = true;
675 return GetNTupleBare(ntupleName);
676}
677
680{
681 RTFHeader fileHeader;
682 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
683
684 RTFKey key;
685 RTFString name;
686 ReadBuffer(&key, sizeof(key), fileHeader.fBEGIN);
687 // Skip over the entire key length, including the class name, object name, and title stored in it.
688 std::uint64_t offset = fileHeader.fBEGIN + key.fKeyLen;
689 // Skip over the name and title of the TNamed preceding the TFile entry.
690 ReadBuffer(&name, 1, offset);
691 offset += name.GetSize();
692 ReadBuffer(&name, 1, offset);
693 offset += name.GetSize();
694 RTFFile file;
695 ReadBuffer(&file, sizeof(file), offset);
696
697 RUInt32BE nKeys;
698 offset = file.GetSeekKeys();
699 ReadBuffer(&key, sizeof(key), offset);
700 offset += key.fKeyLen;
701 ReadBuffer(&nKeys, sizeof(nKeys), offset);
702 offset += sizeof(nKeys);
703 bool found = false;
704 for (unsigned int i = 0; i < nKeys; ++i) {
705 ReadBuffer(&key, sizeof(key), offset);
706 auto offsetNextKey = offset + key.fKeyLen;
707
708 offset += key.GetHeaderSize();
709 ReadBuffer(&name, 1, offset);
710 ReadBuffer(&name, name.GetSize(), offset);
711 if (std::string_view(name.fData, name.fLName) != kNTupleClassName) {
712 offset = offsetNextKey;
713 continue;
714 }
715 offset += name.GetSize();
716 ReadBuffer(&name, 1, offset);
717 ReadBuffer(&name, name.GetSize(), offset);
718 if (std::string_view(name.fData, name.fLName) == ntupleName) {
719 found = true;
720 break;
721 }
722 offset = offsetNextKey;
723 }
724 if (!found) {
725 return R__FAIL("no RNTuple named '" + std::string(ntupleName) + "' in file '" + fRawFile->GetUrl() + "'");
726 }
727
728 offset = key.GetSeekKey() + key.fKeyLen;
729
730 // size of a RTFNTuple version 2 (min supported version); future anchor versions can grow.
731 constexpr size_t kMinNTupleSize = 78;
732 static_assert(kMinNTupleSize == RTFNTuple::GetSizePlusChecksum());
733 if (key.fObjLen < kMinNTupleSize) {
734 return R__FAIL("invalid anchor size: " + std::to_string(key.fObjLen) + " < " + std::to_string(sizeof(RTFNTuple)));
735 }
736 // The object length can be smaller than the size of RTFNTuple if it comes from a past RNTuple class version,
737 // or larger than it if it comes from a future RNTuple class version.
738 auto bufAnchor = std::make_unique<unsigned char[]>(std::max<size_t>(key.fObjLen, sizeof(RTFNTuple)));
739 RTFNTuple *ntuple = new (bufAnchor.get()) RTFNTuple;
740
741 auto objNbytes = key.GetSize() - key.fKeyLen;
742 ReadBuffer(ntuple, objNbytes, offset);
743 if (objNbytes != key.fObjLen) {
744 RNTupleDecompressor decompressor;
745 decompressor.Unzip(bufAnchor.get(), objNbytes, key.fObjLen);
746 }
747
748 // We require that future class versions only append members and store the checksum in the last 8 bytes
749 // Checksum calculation: strip byte count, class version, fChecksum member
750 auto lenCkData = key.fObjLen - ntuple->GetOffsetCkData() - sizeof(uint64_t);
751 auto ckCalc = XXH3_64bits(ntuple->GetPtrCkData(), lenCkData);
752 uint64_t ckOnDisk;
753
754 RUInt64BE *ckOnDiskPtr = reinterpret_cast<RUInt64BE *>(bufAnchor.get() + key.fObjLen - sizeof(uint64_t));
755 ckOnDisk = static_cast<uint64_t>(*ckOnDiskPtr);
756 if (ckCalc != ckOnDisk) {
757 return R__FAIL("RNTuple anchor checksum mismatch");
758 }
759
760 return CreateAnchor(ntuple->fVersionEpoch, ntuple->fVersionMajor, ntuple->fVersionMinor, ntuple->fVersionPatch,
761 ntuple->fSeekHeader, ntuple->fNBytesHeader, ntuple->fLenHeader, ntuple->fSeekFooter,
762 ntuple->fNBytesFooter, ntuple->fLenFooter, ntuple->fMaxKeySize);
763}
764
767{
768 RBareFileHeader fileHeader;
769 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
770 RTFString name;
771 auto offset = sizeof(fileHeader);
772 ReadBuffer(&name, 1, offset);
773 ReadBuffer(&name, name.GetSize(), offset);
774 std::string_view foundName(name.fData, name.fLName);
775 if (foundName != ntupleName) {
776 return R__FAIL("expected RNTuple named '" + std::string(ntupleName) + "' but instead found '" +
777 std::string(foundName) + "' in file '" + fRawFile->GetUrl() + "'");
778 }
779 offset += name.GetSize();
780
781 RTFNTuple ntuple;
782 ReadBuffer(&ntuple, sizeof(ntuple), offset);
783 std::uint64_t onDiskChecksum;
784 ReadBuffer(&onDiskChecksum, sizeof(onDiskChecksum), offset + sizeof(ntuple));
785 auto checksum = XXH3_64bits(ntuple.GetPtrCkData(), ntuple.GetSizeCkData());
786 if (checksum != static_cast<uint64_t>(onDiskChecksum))
787 return R__FAIL("RNTuple bare file: anchor checksum mismatch");
788
789 return CreateAnchor(ntuple.fVersionEpoch, ntuple.fVersionMajor, ntuple.fVersionMinor, ntuple.fVersionPatch,
790 ntuple.fSeekHeader, ntuple.fNBytesHeader, ntuple.fLenHeader, ntuple.fSeekFooter,
791 ntuple.fNBytesFooter, ntuple.fLenFooter, ntuple.fMaxKeySize);
792}
793
794void ROOT::Experimental::Internal::RMiniFileReader::ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
795{
796 size_t nread;
797 if (fMaxKeySize == 0 || nbytes <= fMaxKeySize) {
798 // Fast path: read single blob
799 nread = fRawFile->ReadAt(buffer, nbytes, offset);
800 } else {
801 // Read chunked blob. See RNTupleFileWriter::WriteBlob() for details.
802 const size_t nChunks = ComputeNumChunks(nbytes, fMaxKeySize);
803 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
804 const size_t nbytesFirstChunk = fMaxKeySize - nbytesChunkOffsets;
805 uint8_t *bufCur = reinterpret_cast<uint8_t *>(buffer);
806
807 // Read first chunk
808 nread = fRawFile->ReadAt(bufCur, fMaxKeySize, offset);
809 R__ASSERT(nread == fMaxKeySize);
810 // NOTE: we read the entire chunk in `bufCur`, but we only advance the pointer by `nbytesFirstChunk`,
811 // since the last part of `bufCur` will later be overwritten by the next chunk's payload.
812 // We do this to avoid a second ReadAt to read in the chunk offsets.
813 bufCur += nbytesFirstChunk;
814 nread -= nbytesChunkOffsets;
815
816 const auto chunkOffsets = std::make_unique<std::uint64_t[]>(nChunks - 1);
817 memcpy(chunkOffsets.get(), bufCur, nbytesChunkOffsets);
818
819 size_t remainingBytes = nbytes - nbytesFirstChunk;
820 std::uint64_t *curChunkOffset = &chunkOffsets[0];
821
822 do {
823 std::uint64_t chunkOffset;
824 RNTupleSerializer::DeserializeUInt64(curChunkOffset, chunkOffset);
825 ++curChunkOffset;
826
827 const size_t bytesToRead = std::min<size_t>(fMaxKeySize, remainingBytes);
828 // Ensure we don't read outside of the buffer
829 R__ASSERT(static_cast<size_t>(bufCur - reinterpret_cast<uint8_t *>(buffer)) <= nbytes - bytesToRead);
830
831 auto nbytesRead = fRawFile->ReadAt(bufCur, bytesToRead, chunkOffset);
832 R__ASSERT(nbytesRead == bytesToRead);
833
834 nread += bytesToRead;
835 bufCur += bytesToRead;
836 remainingBytes -= bytesToRead;
837 } while (remainingBytes > 0);
838 }
839 R__ASSERT(nread == nbytes);
840}
841
842////////////////////////////////////////////////////////////////////////////////
843
845
847{
848 static_assert(kHeaderBlockSize % kBlockAlign == 0, "invalid header block size");
849 if (bufferSize % kBlockAlign != 0)
850 throw RException(R__FAIL("Buffer size not a multiple of alignment: " + std::to_string(bufferSize)));
851 fBlockSize = bufferSize;
852
853 std::align_val_t blockAlign{kBlockAlign};
854 fHeaderBlock = static_cast<unsigned char *>(::operator new[](kHeaderBlockSize, blockAlign));
855 memset(fHeaderBlock, 0, kHeaderBlockSize);
856 fBlock = static_cast<unsigned char *>(::operator new[](fBlockSize, blockAlign));
857 memset(fBlock, 0, fBlockSize);
858}
859
861{
862 if (fFile)
863 fclose(fFile);
864
865 std::align_val_t blockAlign{kBlockAlign};
866 if (fHeaderBlock)
867 ::operator delete[](fHeaderBlock, blockAlign);
868 if (fBlock)
869 ::operator delete[](fBlock, blockAlign);
870}
871
872namespace {
873int FSeek64(FILE *stream, std::int64_t offset, int origin)
874{
875#ifdef R__SEEK64
876 return fseeko64(stream, offset, origin);
877#else
878 return fseek(stream, offset, origin);
879#endif
880}
881} // namespace
882
884{
885 // Write the last partially filled block, which may still need appropriate alignment for Direct I/O.
886 // If it is the first block, get the updated header block.
887 if (fBlockOffset == 0) {
888 std::size_t headerBlockSize = kHeaderBlockSize;
889 if (headerBlockSize > fFilePos) {
890 headerBlockSize = fFilePos;
891 }
892 memcpy(fBlock, fHeaderBlock, headerBlockSize);
893 }
894
895 std::size_t retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
896 if (retval)
897 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
898
899 std::size_t lastBlockSize = fFilePos - fBlockOffset;
900 R__ASSERT(lastBlockSize <= fBlockSize);
901 if (fDirectIO) {
902 // Round up to a multiple of kBlockAlign.
903 lastBlockSize += kBlockAlign - 1;
904 lastBlockSize = (lastBlockSize / kBlockAlign) * kBlockAlign;
905 R__ASSERT(lastBlockSize <= fBlockSize);
906 }
907 retval = fwrite(fBlock, 1, lastBlockSize, fFile);
908 if (retval != lastBlockSize)
909 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
910
911 // Write the (updated) header block, unless it was part of the write above.
912 if (fBlockOffset > 0) {
913 retval = FSeek64(fFile, 0, SEEK_SET);
914 if (retval)
915 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
916
917 retval = fwrite(fHeaderBlock, 1, kHeaderBlockSize, fFile);
918 if (retval != RFileSimple::kHeaderBlockSize)
919 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
920 }
921
922 retval = fflush(fFile);
923 if (retval)
924 throw RException(R__FAIL(std::string("Flush failed: ") + strerror(errno)));
925}
926
928 std::int64_t offset)
929{
930 R__ASSERT(fFile);
931 size_t retval;
932 if ((offset >= 0) && (static_cast<std::uint64_t>(offset) != fFilePos)) {
933 fFilePos = offset;
934 }
935
936 // Keep header block to overwrite on commit.
937 if (fFilePos < kHeaderBlockSize) {
938 std::size_t headerBytes = nbytes;
939 if (fFilePos + headerBytes > kHeaderBlockSize) {
940 headerBytes = kHeaderBlockSize - fFilePos;
941 }
942 memcpy(fHeaderBlock + fFilePos, buffer, headerBytes);
943 }
944
945 R__ASSERT(fFilePos >= fBlockOffset);
946
947 while (nbytes > 0) {
948 std::uint64_t posInBlock = fFilePos % fBlockSize;
949 std::uint64_t blockOffset = fFilePos - posInBlock;
950 if (blockOffset != fBlockOffset) {
951 // Write the block.
952 retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
953 if (retval)
954 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
955
956 retval = fwrite(fBlock, 1, fBlockSize, fFile);
957 if (retval != fBlockSize)
958 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
959
960 // Null the buffer contents for good measure.
961 memset(fBlock, 0, fBlockSize);
962 }
963
964 fBlockOffset = blockOffset;
965 std::size_t blockSize = nbytes;
966 if (blockSize > fBlockSize - posInBlock) {
967 blockSize = fBlockSize - posInBlock;
968 }
969 memcpy(fBlock + posInBlock, buffer, blockSize);
970 buffer = static_cast<const unsigned char *>(buffer) + blockSize;
971 nbytes -= blockSize;
972 fFilePos += blockSize;
973 }
974}
975
977 const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset, std::uint64_t directoryOffset,
978 const std::string &className, const std::string &objectName, const std::string &title)
979{
980 if (offset > 0)
981 fKeyOffset = offset;
982 RTFString strClass{className};
983 RTFString strObject{objectName};
984 RTFString strTitle{title};
985
986 RTFKey key(fKeyOffset, directoryOffset, strClass, strObject, strTitle, len, nbytes);
987 Write(&key, key.fKeyHeaderSize, fKeyOffset);
988 Write(&strClass, strClass.GetSize());
989 Write(&strObject, strObject.GetSize());
990 Write(&strTitle, strTitle.GetSize());
991 auto offsetData = fFilePos;
992 // The next key starts after the data.
993 fKeyOffset = offsetData + nbytes;
994 if (buffer)
995 Write(buffer, nbytes);
996
997 return offsetData;
998}
999
1000////////////////////////////////////////////////////////////////////////////////
1001
1003 std::int64_t offset)
1004{
1005 R__ASSERT(fFile);
1006 fFile->Seek(offset);
1007 bool rv = fFile->WriteBuffer((char *)(buffer), nbytes);
1008 if (rv)
1009 throw RException(R__FAIL("WriteBuffer failed."));
1010}
1011
1012std::uint64_t
1014{
1015 std::uint64_t offsetKey;
1016 RKeyBlob keyBlob(fFile);
1017 // Since it is unknown beforehand if offsetKey is beyond the 2GB limit or not,
1018 // RKeyBlob will always reserve space for a big key (version >= 1000)
1019 keyBlob.Reserve(nbytes, &offsetKey);
1020
1021 auto offset = offsetKey;
1022 RTFString strClass{kBlobClassName};
1023 RTFString strObject;
1024 RTFString strTitle;
1025 RTFKey keyHeader(offset, offset, strClass, strObject, strTitle, len, nbytes);
1026 // Follow the fact that RKeyBlob is a big key unconditionally (see above)
1027 keyHeader.MakeBigKey();
1028
1029 Write(&keyHeader, keyHeader.fKeyHeaderSize, offset);
1030 offset += keyHeader.fKeyHeaderSize;
1031 Write(&strClass, strClass.GetSize(), offset);
1032 offset += strClass.GetSize();
1033 Write(&strObject, strObject.GetSize(), offset);
1034 offset += strObject.GetSize();
1035 Write(&strTitle, strTitle.GetSize(), offset);
1036 offset += strTitle.GetSize();
1037 auto offsetData = offset;
1038 if (buffer)
1039 Write(buffer, nbytes, offset);
1040
1041 return offsetData;
1042}
1043
1044////////////////////////////////////////////////////////////////////////////////
1045
1047 : fNTupleName(name)
1048{
1049 fFileSimple.fControlBlock = std::make_unique<ROOT::Experimental::Internal::RTFileControlBlock>();
1050 fNTupleAnchor.fMaxKeySize = maxKeySize;
1051 auto infoRNTuple = RNTuple::Class()->GetStreamerInfo();
1052 fStreamerInfoMap[infoRNTuple->GetNumber()] = infoRNTuple;
1053}
1054
1056
1057std::unique_ptr<ROOT::Experimental::Internal::RNTupleFileWriter>
1058ROOT::Experimental::Internal::RNTupleFileWriter::Recreate(std::string_view ntupleName, std::string_view path,
1059 EContainerFormat containerFormat,
1060 const RNTupleWriteOptions &options)
1061{
1062 std::string fileName(path);
1063 size_t idxDirSep = fileName.find_last_of("\\/");
1064 if (idxDirSep != std::string::npos) {
1065 fileName.erase(0, idxDirSep + 1);
1066 }
1067#ifdef R__LINUX
1068 int flags = O_WRONLY | O_CREAT | O_TRUNC;
1069#ifdef O_LARGEFILE
1070 // Add the equivalent flag that is passed by fopen64.
1071 flags |= O_LARGEFILE;
1072#endif
1073 if (options.GetUseDirectIO()) {
1074 flags |= O_DIRECT;
1075 }
1076 int fd = open(std::string(path).c_str(), flags, 0666);
1077 FILE *fileStream = fdopen(fd, "wb");
1078#else
1079#ifdef R__SEEK64
1080 FILE *fileStream = fopen64(std::string(path.data(), path.size()).c_str(), "wb");
1081#else
1082 FILE *fileStream = fopen(std::string(path.data(), path.size()).c_str(), "wb");
1083#endif
1084#endif
1085 R__ASSERT(fileStream);
1086 // RNTupleFileWriter::RFileSimple does its own buffering, turn off additional buffering from C stdio.
1087 std::setvbuf(fileStream, nullptr, _IONBF, 0);
1088
1089 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, options.GetMaxKeySize()));
1090 writer->fFileSimple.fFile = fileStream;
1091 writer->fFileSimple.fDirectIO = options.GetUseDirectIO();
1092 writer->fFileSimple.AllocateBuffers(options.GetWriteBufferSize());
1093 writer->fFileName = fileName;
1094
1095 int defaultCompression = options.GetCompression();
1096 switch (containerFormat) {
1097 case EContainerFormat::kTFile: writer->WriteTFileSkeleton(defaultCompression); break;
1098 case EContainerFormat::kBare:
1099 writer->fIsBare = true;
1100 writer->WriteBareFileSkeleton(defaultCompression);
1101 break;
1102 default: R__ASSERT(false && "Internal error: unhandled container format");
1103 }
1104
1105 return writer;
1106}
1107
1108std::unique_ptr<ROOT::Experimental::Internal::RNTupleFileWriter>
1110 std::uint64_t maxKeySize)
1111{
1112 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, maxKeySize));
1113 writer->fFileProper.fFile = &file;
1114 return writer;
1115}
1116
1118 const RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
1119{
1120 fStreamerInfoMap.insert(streamerInfos.cbegin(), streamerInfos.cend());
1121}
1122
1124{
1125 if (fFileProper) {
1126 // Easy case, the ROOT file header and the RNTuple streaming is taken care of by TFile
1127 fFileProper.fFile->WriteObject(&fNTupleAnchor, fNTupleName.c_str());
1128
1129 // Make sure the streamer info records used in the RNTuple are written to the file
1131 buf.SetParent(fFileProper.fFile);
1132 for (auto [_, info] : fStreamerInfoMap)
1133 buf.TagStreamerInfo(info);
1134
1135 fFileProper.fFile->Write();
1136 return;
1137 }
1138
1139 // Writing by C file stream: prepare the container format header and stream the RNTuple anchor object
1140 R__ASSERT(fFileSimple);
1141
1142 if (fIsBare) {
1143 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1144 // Compute the checksum
1145 std::uint64_t checksum = XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData());
1146 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple, &ntupleOnDisk, ntupleOnDisk.GetSize());
1147 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple + ntupleOnDisk.GetSize(), &checksum,
1148 sizeof(checksum));
1149 fFileSimple.Flush();
1150 return;
1151 }
1152
1153 WriteTFileNTupleKey();
1154 WriteTFileKeysList();
1155 WriteTFileStreamerInfo();
1156 WriteTFileFreeList();
1157
1158 // Update header and TFile record
1159 memcpy(fFileSimple.fHeaderBlock, &fFileSimple.fControlBlock->fHeader, fFileSimple.fControlBlock->fHeader.GetSize());
1160 R__ASSERT(fFileSimple.fControlBlock->fSeekFileRecord + fFileSimple.fControlBlock->fFileRecord.GetSize() <
1161 RFileSimple::kHeaderBlockSize);
1162 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekFileRecord,
1163 &fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize());
1164
1165 fFileSimple.Flush();
1166}
1167
1168std::uint64_t ROOT::Experimental::Internal::RNTupleFileWriter::WriteBlob(const void *data, size_t nbytes, size_t len)
1169{
1170 auto writeKey = [this](const void *payload, size_t nBytes, size_t length) {
1171 std::uint64_t offset;
1172 if (fFileSimple) {
1173 if (fIsBare) {
1174 offset = fFileSimple.fKeyOffset;
1175 fFileSimple.Write(payload, nBytes);
1176 fFileSimple.fKeyOffset += nBytes;
1177 } else {
1178 offset = fFileSimple.WriteKey(payload, nBytes, length, -1, 100, kBlobClassName);
1179 }
1180 } else {
1181 offset = fFileProper.WriteKey(payload, nBytes, length);
1182 }
1183 return offset;
1184 };
1185
1186 const std::uint64_t maxKeySize = fNTupleAnchor.fMaxKeySize;
1187 R__ASSERT(maxKeySize > 0);
1188 // We don't need the object length except for seeing compression ratios in TFile::Map()
1189 // Make sure that the on-disk object length fits into the TKey header.
1190 if (static_cast<std::uint64_t>(len) > static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()))
1191 len = nbytes;
1192
1193 if (nbytes <= maxKeySize) {
1194 // Fast path: only write 1 key.
1195 return writeKey(data, nbytes, len);
1196 }
1197
1198 /**
1199 * Writing a key bigger than the max allowed size. In this case we split the payload
1200 * into multiple keys, reserving the end of the first key payload for pointers to the
1201 * next ones. E.g. if a key needs to be split into 3 chunks, the first chunk will have
1202 * the format:
1203 * +--------------------+
1204 * | |
1205 * | Data |
1206 * |--------------------|
1207 * | pointer to chunk 2 |
1208 * | pointer to chunk 3 |
1209 * +--------------------+
1210 */
1211 const size_t nChunks = ComputeNumChunks(nbytes, maxKeySize);
1212 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
1213 const size_t nbytesFirstChunk = maxKeySize - nbytesChunkOffsets;
1214 // Skip writing the first chunk, it will be written last (in the file) below.
1215
1216 const uint8_t *chunkData = reinterpret_cast<const uint8_t *>(data) + nbytesFirstChunk;
1217 size_t remainingBytes = nbytes - nbytesFirstChunk;
1218
1219 const auto chunkOffsetsToWrite = std::make_unique<std::uint64_t[]>(nChunks - 1);
1220 std::uint64_t chunkOffsetIdx = 0;
1221
1222 do {
1223 const size_t bytesNextChunk = std::min<size_t>(remainingBytes, maxKeySize);
1224 const std::uint64_t offset = writeKey(chunkData, bytesNextChunk, bytesNextChunk);
1225
1226 RNTupleSerializer::SerializeUInt64(offset, &chunkOffsetsToWrite[chunkOffsetIdx]);
1227 ++chunkOffsetIdx;
1228
1229 remainingBytes -= bytesNextChunk;
1230 chunkData += bytesNextChunk;
1231
1232 } while (remainingBytes > 0);
1233
1234 // Write the first key, with part of the data and the pointers to (logically) following keys appended.
1235 const std::uint64_t firstOffset = ReserveBlob(maxKeySize, maxKeySize);
1236 WriteIntoReservedBlob(data, nbytesFirstChunk, firstOffset);
1237 const std::uint64_t chunkOffsetsOffset = firstOffset + nbytesFirstChunk;
1238 WriteIntoReservedBlob(chunkOffsetsToWrite.get(), nbytesChunkOffsets, chunkOffsetsOffset);
1239
1240 return firstOffset;
1241}
1242
1244{
1245 // ReserveBlob cannot be used to reserve a multi-key blob
1246 R__ASSERT(nbytes <= fNTupleAnchor.GetMaxKeySize());
1247
1248 std::uint64_t offset;
1249 if (fFileSimple) {
1250 if (fIsBare) {
1251 offset = fFileSimple.fKeyOffset;
1252 fFileSimple.fKeyOffset += nbytes;
1253 } else {
1254 offset = fFileSimple.WriteKey(/*buffer=*/nullptr, nbytes, len, -1, 100, kBlobClassName);
1255 }
1256 } else {
1257 offset = fFileProper.WriteKey(/*buffer=*/nullptr, nbytes, len);
1258 }
1259 return offset;
1260}
1261
1263 std::int64_t offset)
1264{
1265 if (fFileSimple) {
1266 fFileSimple.Write(buffer, nbytes, offset);
1267 } else {
1268 fFileProper.Write(buffer, nbytes, offset);
1269 }
1270}
1271
1272std::uint64_t
1274{
1275 auto offset = WriteBlob(data, nbytes, lenHeader);
1276 fNTupleAnchor.fLenHeader = lenHeader;
1277 fNTupleAnchor.fNBytesHeader = nbytes;
1278 fNTupleAnchor.fSeekHeader = offset;
1279 return offset;
1280}
1281
1282std::uint64_t
1284{
1285 auto offset = WriteBlob(data, nbytes, lenFooter);
1286 fNTupleAnchor.fLenFooter = lenFooter;
1287 fNTupleAnchor.fNBytesFooter = nbytes;
1288 fNTupleAnchor.fSeekFooter = offset;
1289 return offset;
1290}
1291
1293{
1294 RBareFileHeader bareHeader;
1295 bareHeader.fCompress = defaultCompression;
1296 fFileSimple.Write(&bareHeader, sizeof(bareHeader), 0);
1297 RTFString ntupleName{fNTupleName};
1298 fFileSimple.Write(&ntupleName, ntupleName.GetSize());
1299
1300 // Write zero-initialized ntuple to reserve the space; will be overwritten on commit
1301 RTFNTuple ntupleOnDisk;
1302 fFileSimple.fControlBlock->fSeekNTuple = fFileSimple.fFilePos;
1303 fFileSimple.Write(&ntupleOnDisk, ntupleOnDisk.GetSize());
1304 std::uint64_t checksum = 0;
1305 fFileSimple.Write(&checksum, sizeof(checksum));
1306 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1307}
1308
1310{
1311 // The streamer info record is a TList of TStreamerInfo object. We cannot use
1312 // RNTupleSerializer::SerializeStreamerInfos because that uses TBufferIO::WriteObject.
1313 // This would prepend the streamed TList with self-decription information.
1314 // The streamer info record is just the streamed TList.
1315
1316 TList streamerInfoList;
1317 for (auto [_, info] : fStreamerInfoMap) {
1318 streamerInfoList.Add(info);
1319 }
1320
1321 // We will stream the list with a TBufferFile. When reading the streamer info records back,
1322 // the read buffer includes the key and the streamed list. Therefore, we need to start streaming
1323 // with an offset of the key length. Otherwise, the offset for referencing duplicate objects in the
1324 // buffer will point to the wrong places.
1325
1326 // Figure out key length
1327 RTFString strTList{"TList"};
1328 RTFString strStreamerInfo{"StreamerInfo"};
1329 RTFString strStreamerTitle{"Doubly linked list"};
1330 fFileSimple.fControlBlock->fHeader.SetSeekInfo(fFileSimple.fKeyOffset);
1331 auto keyLen =
1332 RTFKey(fFileSimple.fControlBlock->fHeader.GetSeekInfo(), 100, strTList, strStreamerInfo, strStreamerTitle, 0)
1333 .fKeyLen;
1334
1335 TBufferFile buffer(TBuffer::kWrite, keyLen + 1);
1336 buffer.SetBufferOffset(keyLen);
1337 streamerInfoList.Streamer(buffer);
1338 assert(buffer.Length() > keyLen);
1339 const auto bufPayload = buffer.Buffer() + keyLen;
1340 const auto lenPayload = buffer.Length() - keyLen;
1341
1342 RNTupleCompressor compressor;
1343 auto zipStreamerInfos = std::make_unique<unsigned char[]>(lenPayload);
1344 auto szZipStreamerInfos = compressor.Zip(bufPayload, lenPayload, 1, zipStreamerInfos.get());
1345
1346 fFileSimple.WriteKey(zipStreamerInfos.get(), szZipStreamerInfos, lenPayload,
1347 fFileSimple.fControlBlock->fHeader.GetSeekInfo(), 100, "TList", "StreamerInfo",
1348 "Doubly linked list");
1349 fFileSimple.fControlBlock->fHeader.SetNbytesInfo(fFileSimple.fFilePos -
1350 fFileSimple.fControlBlock->fHeader.GetSeekInfo());
1351}
1352
1354{
1355 RTFString strEmpty;
1356 RTFString strRNTupleClass{"ROOT::RNTuple"};
1357 RTFString strRNTupleName{fNTupleName};
1358 RTFString strFileName{fFileName};
1359
1360 RTFKey keyRNTuple(fFileSimple.fControlBlock->fSeekNTuple, 100, strRNTupleClass, strRNTupleName, strEmpty,
1361 RTFNTuple::GetSizePlusChecksum());
1362
1363 fFileSimple.fControlBlock->fFileRecord.SetSeekKeys(fFileSimple.fKeyOffset);
1364 RTFKeyList keyList{1};
1365 RTFKey keyKeyList(fFileSimple.fControlBlock->fFileRecord.GetSeekKeys(), 100, strEmpty, strFileName, strEmpty,
1366 keyList.GetSize() + keyRNTuple.fKeyLen);
1367 fFileSimple.Write(&keyKeyList, keyKeyList.fKeyHeaderSize, fFileSimple.fControlBlock->fFileRecord.GetSeekKeys());
1368 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1369 fFileSimple.Write(&strFileName, strFileName.GetSize());
1370 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1371 fFileSimple.Write(&keyList, keyList.GetSize());
1372 fFileSimple.Write(&keyRNTuple, keyRNTuple.fKeyHeaderSize);
1373 // Write class name, object name, and title for this key.
1374 fFileSimple.Write(&strRNTupleClass, strRNTupleClass.GetSize());
1375 fFileSimple.Write(&strRNTupleName, strRNTupleName.GetSize());
1376 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1377 fFileSimple.fControlBlock->fFileRecord.fNBytesKeys =
1378 fFileSimple.fFilePos - fFileSimple.fControlBlock->fFileRecord.GetSeekKeys();
1379 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1380}
1381
1383{
1384 fFileSimple.fControlBlock->fHeader.SetSeekFree(fFileSimple.fKeyOffset);
1385 RTFString strEmpty;
1386 RTFString strFileName{fFileName};
1387 RTFFreeEntry freeEntry;
1388 RTFKey keyFreeList(fFileSimple.fControlBlock->fHeader.GetSeekFree(), 100, strEmpty, strFileName, strEmpty,
1389 freeEntry.GetSize());
1390 std::uint64_t firstFree = fFileSimple.fControlBlock->fHeader.GetSeekFree() + keyFreeList.GetSize();
1391 freeEntry.Set(firstFree, std::max(2000000000ULL, ((firstFree / 1000000000ULL) + 1) * 1000000000ULL));
1392 fFileSimple.WriteKey(&freeEntry, freeEntry.GetSize(), freeEntry.GetSize(),
1393 fFileSimple.fControlBlock->fHeader.GetSeekFree(), 100, "", fFileName, "");
1394 fFileSimple.fControlBlock->fHeader.SetNbytesFree(fFileSimple.fFilePos -
1395 fFileSimple.fControlBlock->fHeader.GetSeekFree());
1396 fFileSimple.fControlBlock->fHeader.SetEnd(fFileSimple.fFilePos);
1397}
1398
1400{
1401 RTFString strRNTupleClass{"ROOT::RNTuple"};
1402 RTFString strRNTupleName{fNTupleName};
1403 RTFString strEmpty;
1404
1405 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1406 RUInt64BE checksum{XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData())};
1407 fFileSimple.fControlBlock->fSeekNTuple = fFileSimple.fKeyOffset;
1408
1409 char keyBuf[RTFNTuple::GetSizePlusChecksum()];
1410
1411 // concatenate the RNTuple anchor with its checksum
1412 memcpy(keyBuf, &ntupleOnDisk, sizeof(RTFNTuple));
1413 memcpy(keyBuf + sizeof(RTFNTuple), &checksum, sizeof(checksum));
1414
1415 fFileSimple.WriteKey(keyBuf, sizeof(keyBuf), sizeof(keyBuf), fFileSimple.fControlBlock->fSeekNTuple, 100,
1416 "ROOT::RNTuple", fNTupleName, "");
1417}
1418
1420{
1421 RTFString strTFile{"TFile"};
1422 RTFString strFileName{fFileName};
1423 RTFString strEmpty;
1424
1425 fFileSimple.fControlBlock->fHeader = RTFHeader(defaultCompression);
1426
1427 RTFUUID uuid;
1428
1429 // First record of the file: the TFile object at offset 100
1430 RTFKey keyRoot(100, 0, strTFile, strFileName, strEmpty,
1431 sizeof(RTFFile) + strFileName.GetSize() + strEmpty.GetSize() + uuid.GetSize());
1432 std::uint32_t nbytesName = keyRoot.fKeyLen + strFileName.GetSize() + 1;
1433 fFileSimple.fControlBlock->fFileRecord.fNBytesName = nbytesName;
1434 fFileSimple.fControlBlock->fHeader.SetNbytesName(nbytesName);
1435
1436 fFileSimple.Write(&keyRoot, keyRoot.fKeyHeaderSize, 100);
1437 // Write class name, object name, and title for the TFile key.
1438 fFileSimple.Write(&strTFile, strTFile.GetSize());
1439 fFileSimple.Write(&strFileName, strFileName.GetSize());
1440 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1441 // Write the name and title of the TNamed preceding the TFile entry.
1442 fFileSimple.Write(&strFileName, strFileName.GetSize());
1443 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1444 // Will be overwritten on commit
1445 fFileSimple.fControlBlock->fSeekFileRecord = fFileSimple.fFilePos;
1446 fFileSimple.Write(&fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize());
1447 fFileSimple.Write(&uuid, uuid.GetSize());
1448
1449 // Padding bytes to allow the TFile record to grow for a big file
1450 RUInt32BE padding{0};
1451 for (int i = 0; i < 3; ++i)
1452 fFileSimple.Write(&padding, sizeof(padding));
1453 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1454}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
#define ClassDefInlineOverride(name, id)
Definition Rtypes.h:358
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
char name[80]
Definition TGX11.cxx:110
Binding & operator=(OUT(*fun)(void))
void ReadBuffer(char *&buffer) override
T1 fFirst
Definition X11Events.mm:86
#define _(A, B)
Definition cfortran.h:108
The RKeyBlob writes an invisible key into a TFile.
void Reserve(size_t nbytes, std::uint64_t *seekKey)
Register a new key for a data record of size nbytes.
RResult< RNTuple > GetNTupleProper(std::string_view ntupleName)
Used when the file turns out to be a TFile container.
RResult< RNTuple > GetNTupleBare(std::string_view ntupleName)
Used when the file container turns out to be a bare file.
RResult< RNTuple > GetNTuple(std::string_view ntupleName)
Extracts header and footer location for the RNTuple identified by ntupleName.
void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Reads a given byte range from the file into the provided memory buffer.
Helper class to compress data blocks in the ROOT compression frame format.
size_t Zip(const void *from, size_t nbytes, int compression, Writer_t fnWriter)
Returns the size of the compressed data.
Helper class to uncompress data blocks in the ROOT compression frame format.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
Write RNTuple data blocks in a TFile or a bare file container.
RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize)
std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len)
Writes a new record as an RBlob key into the file.
std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed f...
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TFile &file, std::uint64_t maxKeySize)
Add a new RNTuple identified by ntupleName to the existing TFile.
void WriteTFileKeysList()
Write the TList with the RNTuple key.
void UpdateStreamerInfos(const RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
Ensures that the streamer info records passed as argument are written to the file.
void Commit()
Writes the RNTuple key to the file so that the header and footer keys can be found.
std::uint64_t ReserveBlob(size_t nbytes, size_t len)
Reserves a new record as an RBlob key in the file.
RFileSimple fFileSimple
For simple use cases, survives without libRIO dependency.
void WriteTFileNTupleKey()
The only key that will be visible in file->ls()
void WriteTFileFreeList()
Last record in the file.
EContainerFormat
For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
void WriteTFileSkeleton(int defaultCompression)
For a TFile container written by a C file stream, write the header and TFile object.
void WriteBareFileSkeleton(int defaultCompression)
For a bare file, which is necessarily written by a C file stream, write file header.
void WriteTFileStreamerInfo()
Write the compressed streamer info record with the description of the RNTuple class.
std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed h...
RNTuple fNTupleAnchor
Header and footer location of the ntuple, written on Commit()
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
Write into a reserved record; the caller is responsible for making sure that the written byte range i...
RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap
Set of streamer info records that should be written to the file.
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Common user-tunable settings for storing ntuples.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
std::uint64_t GetLenFooter() const
Definition RNTuple.hxx:128
std::uint64_t GetSeekHeader() const
Definition RNTuple.hxx:122
std::uint16_t GetVersionMajor() const
Definition RNTuple.hxx:118
std::uint64_t GetNBytesFooter() const
Definition RNTuple.hxx:127
std::uint64_t GetNBytesHeader() const
Definition RNTuple.hxx:123
std::uint64_t fMaxKeySize
The maximum size for a TKey payload. Payloads bigger than this size will be written as multiple blobs...
Definition RNTuple.hxx:109
std::uint16_t GetVersionMinor() const
Definition RNTuple.hxx:119
std::uint16_t GetVersionPatch() const
Definition RNTuple.hxx:120
std::uint64_t GetMaxKeySize() const
Definition RNTuple.hxx:129
std::uint64_t GetLenHeader() const
Definition RNTuple.hxx:124
static TClass * Class()
std::uint64_t GetSeekFooter() const
Definition RNTuple.hxx:126
std::uint16_t GetVersionEpoch() const
Definition RNTuple.hxx:117
The concrete implementation of TBuffer for writing/reading to/from a ROOT file or socket.
Definition TBufferFile.h:47
void TagStreamerInfo(TVirtualStreamerInfo *info) override
Mark the classindex of the current file as using this TStreamerInfo.
void SetParent(TObject *parent)
Set parent owning this buffer.
Definition TBuffer.cxx:270
@ kWrite
Definition TBuffer.h:73
void SetBufferOffset(Int_t offset=0)
Definition TBuffer.h:93
Int_t Length() const
Definition TBuffer.h:100
char * Buffer() const
Definition TBuffer.h:96
TVirtualStreamerInfo * GetStreamerInfo(Int_t version=0, Bool_t isTransient=kFALSE) const
returns a pointer to the TVirtualStreamerInfo object for version If the object does not exist,...
Definition TClass.cxx:4668
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
Int_t Sizeof() const override
Return the size in bytes of the key header structure.
Definition TKey.cxx:1342
Int_t fVersion
Key version identifier.
Definition TKey.h:39
Short_t fKeylen
Number of bytes for the key itself.
Definition TKey.h:43
Long64_t fSeekKey
Location of object on file.
Definition TKey.h:45
virtual void Create(Int_t nbytes, TFile *f=nullptr)
Create a TKey object of specified size.
Definition TKey.cxx:460
TString fClassName
Object Class name.
Definition TKey.h:47
A doubly linked list.
Definition TList.h:38
void Streamer(TBuffer &) override
Stream all objects in the collection to or from the I/O buffer.
Definition TList.cxx:1189
void Add(TObject *obj) override
Definition TList.h:81
#define Swap(a, b)
Definition geom.c:201
RNTuple CreateAnchor(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch, std::uint64_t seekHeader, std::uint64_t nbytesHeader, std::uint64_t lenHeader, std::uint64_t seekFooter, std::uint64_t nbytesFooter, std::uint64_t lenFooter, std::uint64_t maxKeySize)
Definition RNTuple.cxx:64
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Helper templated class for swapping bytes; specializations for N={2,4,8} are provided below.
Definition Byteswap.h:124
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t WriteKey(const void *buffer, size_t nbytes, size_t len)
Writes an RBlob opaque key with the provided buffer as data record and returns the offset of the reco...
std::unique_ptr< ROOT::Experimental::Internal::RTFileControlBlock > fControlBlock
Keeps track of TFile control structures, which need to be updated on committing the data set.
void Write(const void *buffer, size_t nbytes, std::int64_t offset=-1)
Writes bytes in the open stream, either at fFilePos or at the given offset.
std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset=-1, std::uint64_t directoryOffset=100, const std::string &className="", const std::string &objectName="", const std::string &title="")
Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the ...
If a TFile container is written by a C stream (simple file), on dataset commit, the file header and t...
auto * tt
Definition textangle.C:16