33 , fNTupleName(ntupleName)
36 , fPrevClusterNEntries(0)
39 "Do not store real data with this version of RNTuple!";
44 , fNTupleName(ntupleName)
48 "Do not store real data with this version of RNTuple!";
56 if (fSettings.fTakeOwnership) {
57 fSettings.fFile->Close();
58 delete fSettings.fFile;
72 auto columnId = fNTupleHeader.fColumns.size();
73 fNTupleHeader.fColumns.emplace_back(columnHeader);
81 fNTupleHeader.fPageSize = kPageSize;
82 fDirectory = fSettings.fFile->mkdir(fNTupleName.c_str());
84 unsigned int nColumns = 0;
86 nColumns +=
f.GetNColumns();
88 fPagePool = std::make_unique<RPagePool>(fNTupleHeader.fPageSize, nColumns);
92 fieldHeader.
fName =
f.GetName();
93 fieldHeader.
fType =
f.GetType();
95 if (
f.GetParent()) fieldHeader.
fParentName =
f.GetParent()->GetName();
96 fNTupleHeader.fFields.emplace_back(fieldHeader);
98 f.ConnectColumns(
this);
100 R__ASSERT(nColumns == fNTupleHeader.fColumns.size());
102 fCurrentCluster.fPagesPerColumn.resize(nColumns);
103 fNTupleFooter.fNElementsPerColumn.resize(nColumns, 0);
109 auto columnId = columnHandle.
fId;
116 std::to_string(fCurrentCluster.fPagesPerColumn[columnId].fRangeStarts.size());
117 fDirectory->WriteObject(&pagePayload, key.c_str());
118 fCurrentCluster.fPagesPerColumn[columnId].fRangeStarts.push_back(page.
GetRangeFirst());
119 fNTupleFooter.fNElementsPerColumn[columnId] += page.
GetNElements();
124 fCurrentCluster.fNEntries = nEntries - fPrevClusterNEntries;
125 fPrevClusterNEntries = nEntries;
127 fDirectory->WriteObject(&fCurrentCluster, key.c_str());
128 fNTupleFooter.fNClusters++;
129 fNTupleFooter.fNEntries = nEntries;
131 for (
auto& pageInfo : fCurrentCluster.fPagesPerColumn) {
132 pageInfo.fRangeStarts.clear();
134 fCurrentCluster.fEntryRangeStart = fNTupleFooter.fNEntries;
150 , fNTupleName(ntupleName)
151 , fDirectory(nullptr)
152 , fSettings(settings)
158 , fNTupleName(ntupleName)
159 , fDirectory(nullptr)
169 if (fSettings.fTakeOwnership) {
170 fSettings.fFile->Close();
171 delete fSettings.fFile;
180 auto columnId = fMapper.fColumnName2Id[model.GetName()];
181 R__ASSERT(model == *fMapper.fId2ColumnModel[columnId]);
191 fDirectory = fSettings.fFile->GetDirectory(fNTupleName.c_str());
196 for (
auto &fieldHeader : ntupleHeader->fFields) {
197 if (fieldHeader.fParentName.empty()) {
202 auto nColumns = ntupleHeader->
fColumns.size();
203 fPagePool = std::make_unique<RPagePool>(ntupleHeader->fPageSize, nColumns);
204 fMapper.fColumnIndex.resize(nColumns);
206 std::int32_t columnId = 0;
207 for (
auto &columnHeader : ntupleHeader->fColumns) {
208 auto columnModel = std::make_unique<RColumnModel>(
209 columnHeader.fName, columnHeader.fType, columnHeader.fIsSorted);
210 fMapper.fId2ColumnModel[columnId] = std::move(columnModel);
211 fMapper.fColumnName2Id[columnHeader.fName] = columnId;
216 for (
auto &columnHeader : ntupleHeader->fColumns) {
217 if (columnHeader.fOffsetColumn.empty())
continue;
218 fMapper.fColumn2Pointee[fMapper.fColumnName2Id[columnHeader.fOffsetColumn]] =
219 fMapper.fColumnName2Id[columnHeader.fName];
226 for (std::int32_t iCluster = 0; iCluster < ntupleFooter->fNClusters; ++iCluster) {
229 R__ASSERT(clusterFooter->fPagesPerColumn.size() == nColumns);
230 for (
unsigned iColumn = 0; iColumn < nColumns; ++iColumn) {
231 if (clusterFooter->fPagesPerColumn[iColumn].fRangeStarts.empty())
233 NTupleSize_t selfClusterOffset = clusterFooter->fPagesPerColumn[iColumn].fRangeStarts[0];
235 auto itrPointee = fMapper.fColumn2Pointee.find(iColumn);
236 if (itrPointee != fMapper.fColumn2Pointee.end()) {
241 if (!clusterFooter->fPagesPerColumn[itrPointee->second].fRangeStarts.empty())
242 pointeeClusterOffset = clusterFooter->fPagesPerColumn[itrPointee->second].fRangeStarts[0];
245 for (
auto rangeStart : clusterFooter->fPagesPerColumn[iColumn].fRangeStarts) {
246 fMapper.fColumnIndex[iColumn].fRangeStarts.push_back(rangeStart);
247 fMapper.fColumnIndex[iColumn].fClusterId.push_back(iCluster);
248 fMapper.fColumnIndex[iColumn].fPageInCluster.push_back(pageInCluster);
249 fMapper.fColumnIndex[iColumn].fSelfClusterOffset.push_back(selfClusterOffset);
250 fMapper.fColumnIndex[iColumn].fPointeeClusterOffset.push_back(pointeeClusterOffset);
254 delete clusterFooter;
257 for (
unsigned iColumn = 0; iColumn < nColumns; ++iColumn) {
258 fMapper.fColumnIndex[iColumn].fNElements = ntupleFooter->fNElementsPerColumn[iColumn];
260 fMapper.fNEntries = ntupleFooter->fNEntries;
274 auto model = std::make_unique<RNTupleModel>();
275 for (
auto&
f : fMapper.fRootFields) {
277 model->AddField(std::unique_ptr<Detail::RFieldBase>(field));
285 auto columnId = columnHandle.
fId;
286 auto nElems = fMapper.fColumnIndex[columnId].fNElements;
293 std::size_t iLower = 0;
294 std::size_t iUpper = fMapper.fColumnIndex[columnId].fRangeStarts.size() - 1;
296 unsigned iLast = iUpper;
297 while (iLower <= iUpper) {
298 std::size_t iPivot = (iLower + iUpper) / 2;
299 NTupleSize_t pivot = fMapper.fColumnIndex[columnId].fRangeStarts[iPivot];
304 if (iPivot < iLast) next = fMapper.fColumnIndex[columnId].fRangeStarts[iPivot + 1];
305 if ((pivot == index) || (next > index)) {
306 firstOutsidePage = next;
316 auto elemsInPage = firstOutsidePage - firstInPage;
317 void* buf = page->
TryGrow(elemsInPage);
320 auto clusterId = fMapper.fColumnIndex[columnId].fClusterId[pageIdx];
321 auto pageInCluster = fMapper.fColumnIndex[columnId].fPageInCluster[pageIdx];
322 auto selfOffset = fMapper.fColumnIndex[columnId].fSelfClusterOffset[pageIdx];
323 auto pointeeOffset = fMapper.fColumnIndex[columnId].fPointeeClusterOffset[pageIdx];
331 std::to_string(pageInCluster);
332 auto pageKey = fDirectory->GetKey(keyName.c_str());
335 memcpy(page->
GetBuffer(), pagePayload->fContent, pagePayload->fSize);
337 free(pagePayload->fContent);
343 return fMapper.fNEntries;
348 return fMapper.fColumnIndex[columnHandle.
fId].fNElements;
354 return columnHandle.
fId;
#define R__WARNING_HERE(GROUP)
const RColumnModel & GetModel() const
RColumn * GetOffsetColumn() const
static RFieldBase * Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
static constexpr const char * kKeyClusterFooter
static constexpr const char * kKeyPagePayload
static constexpr const char * kKeySeparator
static constexpr const char * kKeyNTupleFooter
static constexpr const char * kKeyNTupleHeader
void CommitDataset() final
Finalize the current cluster and the entrire data set.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page) final
Write a page to the storage. The column must have been added before.
RPageSinkRoot(std::string_view ntupleName, RSettings settings)
void CommitCluster(NTupleSize_t nEntries) final
Finalize the current cluster and create a new one for the following data.
void Create(RNTupleModel *model) final
Physically creates the storage container to hold the tree (e.g., a directory in a TFile or a S3 bucke...
ColumnHandle_t AddColumn(RColumn *column) final
Register a new column.
Abstract interface to write data into a tree.
RPageSourceRoot(std::string_view ntupleName, RSettings settings)
ColumnHandle_t AddColumn(RColumn *column) final
Register a new column.
virtual ~RPageSourceRoot()
std::unique_ptr< ROOT::Experimental::RNTupleModel > GenerateModel() final
void Attach() final
TODO: copy/assignment for creating clones in multiple threads.
void PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t index, RPage *page) final
Fills a page starting with index rangeStart; the corresponding column is taken from the page object.
ColumnId_t GetColumnId(ColumnHandle_t columnHandle) final
NTupleSize_t GetNEntries() final
NTupleSize_t GetNElements(ColumnHandle_t columnHandle) final
Abstract interface to read data from a tree.
Stores information about the cluster in which this page resides.
A page is a fixed size slice of a column that is mapped into memory.
std::size_t GetSize() const
The space taken by column elements in the buffer.
NTupleSize_t GetNElements() const
void SetWindow(const NTupleSize_t rangeFirst, const RClusterInfo &clusterInfo)
Seek the page to a certain position of the column.
NTupleSize_t GetRangeFirst() const
void * TryGrow(std::size_t nElements)
Return a pointer after the last element that has space for nElements new elements.
std::string GetName() const
EColumnType GetType() const
Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various hea...
const RNTupleDescriptor & GetDescriptor() const
void SetNTuple(std::string_view name, const RNTupleVersion &version)
The RNTupleModel encapulates the schema of an ntuple.
RFieldRoot * GetRootField()
For forward and backward compatibility, attach version information to the consitituents of the file f...
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
basic_string_view< char > string_view
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr NTupleSize_t kInvalidNTupleIndex
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Namespace for new ROOT classes and functions.
std::string fOffsetColumn