Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleFillContext.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleFillContext.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleFillContext
17#define ROOT7_RNTupleFillContext
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/REntry.hxx>
21#include <ROOT/RError.hxx>
22#include <ROOT/RPageStorage.hxx>
25#include <ROOT/RNTupleModel.hxx>
26#include <ROOT/RNTupleUtil.hxx>
27
28#include <cstddef>
29#include <cstdint>
30#include <memory>
31#include <vector>
32
33namespace ROOT {
34namespace Experimental {
35
36// clang-format off
37/**
38\class ROOT::Experimental::RNTupleFillContext
39\ingroup NTuple
40\brief A context for filling entries (data) into clusters of an RNTuple
41
42An output cluster can be filled with entries. The caller has to make sure that the data that gets filled into a cluster
43is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
44writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
45triggered by FlushCluster() or by destructing the context. On I/O errors, an exception is thrown.
46
47Instances of this class are not meant to be used in isolation and can be created from an RNTupleParallelWriter. For
48sequential writing, please refer to RNTupleWriter.
49*/
50// clang-format on
52 friend class ROOT::RNTupleWriter;
54
55private:
56 std::unique_ptr<Internal::RPageSink> fSink;
57 /// Needs to be destructed before fSink
58 std::unique_ptr<ROOT::RNTupleModel> fModel;
59
61
64 /// Keeps track of the number of bytes written into the current cluster
65 std::size_t fUnzippedClusterSize = 0;
66 /// The total number of bytes written to storage (i.e., after compression)
67 std::uint64_t fNBytesFlushed = 0;
68 /// The total number of bytes filled into all the so far committed clusters,
69 /// i.e. the uncompressed size of the written clusters
70 std::uint64_t fNBytesFilled = 0;
71 /// Limit for committing cluster no matter the other tunables
73 /// Estimator of uncompressed cluster size, taking into account the estimated compression ratio
75
76 /// Whether to enable staged cluster committing, where only an explicit call to CommitStagedClusters() will logically
77 /// append the clusters to the RNTuple.
79 /// Vector of currently staged clusters.
80 std::vector<Internal::RPageSink::RStagedCluster> fStagedClusters;
81
82 RNTupleFillContext(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSink> sink);
85
86public:
88
89 /// Fill an entry into this context, but don't commit the cluster. The calling code must pass an RNTupleFillStatus
90 /// and check RNTupleFillStatus::ShouldFlushCluster.
91 ///
92 /// This method will perform a light check whether the entry comes from the context's own model.
94 {
95 if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
96 throw RException(R__FAIL("mismatch between entry and model"));
97
98 const std::size_t bytesWritten = entry.Append();
100 fNEntries++;
101
105 status.fShouldFlushCluster =
107 }
108 /// Fill an entry into this context. This method will perform a light check whether the entry comes from the
109 /// context's own model.
110 /// \return The number of uncompressed bytes written.
111 std::size_t Fill(ROOT::REntry &entry)
112 {
114 FillNoFlush(entry, status);
115 if (status.ShouldFlushCluster())
116 FlushCluster();
117 return status.GetLastEntrySize();
118 }
119 /// Flush column data, preparing for CommitCluster or to reduce memory usage. This will trigger compression of pages,
120 /// but not actually write to storage.
121 void FlushColumns();
122 /// Flush so far filled entries to storage
123 void FlushCluster();
124 /// Logically append staged clusters to the RNTuple.
126
127 const ROOT::RNTupleModel &GetModel() const { return *fModel; }
128 std::unique_ptr<ROOT::REntry> CreateEntry() { return fModel->CreateEntry(); }
129
130 /// Return the entry number that was last flushed in a cluster.
132 /// Return the number of entries filled so far.
134
135 void EnableStagedClusterCommitting(bool val = true)
136 {
137 if (!val && !fStagedClusters.empty()) {
138 throw RException(R__FAIL("cannot disable staged committing with pending clusters"));
139 }
141 }
143
145 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
146}; // class RNTupleFillContext
147
148} // namespace Experimental
149} // namespace ROOT
150
151#endif // ROOT7_RNTupleFillContext
#define R__unlikely(expr)
Definition RConfig.hxx:602
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
A collection of Counter objects with a name, a unit, and a description.
A context for filling entries (data) into clusters of an RNTuple.
ROOT::NTupleSize_t GetNEntries() const
Return the number of entries filled so far.
std::size_t fUnzippedClusterSize
Keeps track of the number of bytes written into the current cluster.
const ROOT::RNTupleModel & GetModel() const
void FillNoFlush(ROOT::REntry &entry, ROOT::RNTupleFillStatus &status)
Fill an entry into this context, but don't commit the cluster.
bool fStagedClusterCommitting
Whether to enable staged cluster committing, where only an explicit call to CommitStagedClusters() wi...
std::uint64_t fNBytesFilled
The total number of bytes filled into all the so far committed clusters, i.e.
RNTupleFillContext(const RNTupleFillContext &)=delete
std::unique_ptr< ROOT::RNTupleModel > fModel
Needs to be destructed before fSink.
void FlushCluster()
Flush so far filled entries to storage.
std::unique_ptr< ROOT::REntry > CreateEntry()
std::size_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
RNTupleFillContext & operator=(const RNTupleFillContext &)=delete
RNTupleFillContext(std::unique_ptr< ROOT::RNTupleModel > model, std::unique_ptr< Internal::RPageSink > sink)
std::vector< Internal::RPageSink::RStagedCluster > fStagedClusters
Vector of currently staged clusters.
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
const Detail::RNTupleMetrics & GetMetrics() const
std::uint64_t fNBytesFlushed
The total number of bytes written to storage (i.e., after compression)
std::size_t Fill(ROOT::REntry &entry)
Fill an entry into this context.
ROOT::NTupleSize_t GetLastFlushed() const
Return the entry number that was last flushed in a cluster.
void CommitStagedClusters()
Logically append staged clusters to the RNTuple.
void FlushColumns()
Flush column data, preparing for CommitCluster or to reduce memory usage.
std::unique_ptr< Internal::RPageSink > fSink
A writer to fill an RNTuple from multiple contexts.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
A status object after filling an entry.
ROOT::NTupleSize_t fNEntriesSinceLastFlush
Number of entries written into the current cluster.
std::size_t fUnzippedClusterSize
Number of bytes written into the current cluster.
std::size_t fLastEntrySize
Number of bytes written for the last entry.
bool ShouldFlushCluster() const
Return true if the caller should call FlushCluster.
std::size_t GetLastEntrySize() const
Return the number of bytes for the last entry.
The RNTupleModel encapulates the schema of an ntuple.
An RNTuple that gets filled with entries (data) and writes them to storage.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.