Logo ROOT  
Reference Guide
RPageStorage.cxx
Go to the documentation of this file.
1/// \file RPageStorage.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RPageStorage.hxx>
18#include <ROOT/RColumn.hxx>
19#include <ROOT/RField.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RPagePool.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <Compression.h>
26#include <TError.h>
27
28#include <unordered_map>
29#include <utility>
30
31
33{
34}
35
37{
38}
39
40
41//------------------------------------------------------------------------------
42
43
45 : RPageStorage(name), fOptions(options)
46{
47}
48
50{
51}
52
53std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSource::Create(
54 std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options)
55{
56 return std::make_unique<RPageSourceFile>(ntupleName, location, options);
57}
58
61{
63 auto columnId = fDescriptor.FindColumnId(fieldId, column.GetIndex());
65 return ColumnHandle_t(columnId, &column);
66}
67
69{
70 return fDescriptor.GetNEntries();
71}
72
74{
75 return fDescriptor.GetNElements(columnHandle.fId);
76}
77
79{
80 // TODO(jblomer) distinguish trees
81 return columnHandle.fId;
82}
83
84
85//------------------------------------------------------------------------------
86
87
89 : RPageStorage(name), fOptions(options)
90{
91}
92
94{
95}
96
97std::unique_ptr<ROOT::Experimental::Detail::RPageSink> ROOT::Experimental::Detail::RPageSink::Create(
98 std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options)
99{
100 return std::make_unique<RPageSinkFile>(ntupleName, location, options);
101}
102
105{
106 auto columnId = fLastColumnId++;
107 fDescriptorBuilder.AddColumn(columnId, fieldId, column.GetVersion(), column.GetModel(), column.GetIndex());
108 return ColumnHandle_t(columnId, &column);
109}
110
111
113{
114 fDescriptorBuilder.SetNTuple(fNTupleName, model.GetDescription(), "undefined author",
115 model.GetVersion(), model.GetUuid());
116
117 std::unordered_map<const RFieldBase *, DescriptorId_t> fieldPtr2Id; // necessary to find parent field ids
118 const auto &rootField = *model.GetRootField();
119 fDescriptorBuilder.AddField(fLastFieldId, rootField.GetFieldVersion(), rootField.GetTypeVersion(),
120 rootField.GetName(), rootField.GetType(), rootField.GetNRepetitions(), rootField.GetStructure());
121 fieldPtr2Id[&rootField] = fLastFieldId++;
122 for (auto& f : *model.GetRootField()) {
123 fDescriptorBuilder.AddField(fLastFieldId, f.GetFieldVersion(), f.GetTypeVersion(), f.GetName(), f.GetType(),
124 f.GetNRepetitions(), f.GetStructure());
125 fDescriptorBuilder.AddFieldLink(fieldPtr2Id[f.GetParent()], fLastFieldId);
126
127 Detail::RFieldFuse::Connect(fLastFieldId, *this, f); // issues in turn one or several calls to AddColumn()
128 fieldPtr2Id[&f] = fLastFieldId++;
129 }
130
131 auto nColumns = fLastColumnId;
132 for (DescriptorId_t i = 0; i < nColumns; ++i) {
134 columnRange.fColumnId = i;
135 columnRange.fFirstElementIndex = 0;
136 columnRange.fNElements = 0;
137 columnRange.fCompressionSettings = fOptions.GetCompression();
138 fOpenColumnRanges.emplace_back(columnRange);
140 pageRange.fColumnId = i;
141 fOpenPageRanges.emplace_back(std::move(pageRange));
142 }
143
144 CreateImpl(model);
145}
146
147
149{
150 auto locator = CommitPageImpl(columnHandle, page);
151
152 auto columnId = columnHandle.fId;
153 fOpenColumnRanges[columnId].fNElements += page.GetNElements();
155 pageInfo.fNElements = page.GetNElements();
156 pageInfo.fLocator = locator;
157 fOpenPageRanges[columnId].fPageInfos.emplace_back(pageInfo);
158}
159
160
162{
163 auto locator = CommitClusterImpl(nEntries);
164
165 R__ASSERT((nEntries - fPrevClusterNEntries) < ClusterSize_t(-1));
166 fDescriptorBuilder.AddCluster(fLastClusterId, RNTupleVersion(), fPrevClusterNEntries,
167 ClusterSize_t(nEntries - fPrevClusterNEntries));
168 fDescriptorBuilder.SetClusterLocator(fLastClusterId, locator);
169 for (auto &range : fOpenColumnRanges) {
170 fDescriptorBuilder.AddClusterColumnRange(fLastClusterId, range);
171 range.fFirstElementIndex += range.fNElements;
172 range.fNElements = 0;
173 }
174 for (auto &range : fOpenPageRanges) {
176 std::swap(fullRange, range);
177 range.fColumnId = fullRange.fColumnId;
178 fDescriptorBuilder.AddClusterPageRange(fLastClusterId, std::move(fullRange));
179 }
180 ++fLastClusterId;
181 fPrevClusterNEntries = nEntries;
182}
#define f(i)
Definition: RSha256.hxx:104
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
const RColumnModel & GetModel() const
Definition: RColumn.hxx:231
std::uint32_t GetIndex() const
Definition: RColumn.hxx:232
RNTupleVersion GetVersion() const
Definition: RColumn.hxx:237
static void Connect(DescriptorId_t fieldId, RPageStorage &pageStorage, RFieldBase &field)
Definition: RField.cxx:115
RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options)
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
void CommitCluster(NTupleSize_t nEntries)
Finalize the current cluster and create a new one for the following data.
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final
Register a new column.
NTupleSize_t GetNElements(ColumnHandle_t columnHandle)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions)
ColumnId_t GetColumnId(ColumnHandle_t columnHandle)
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
The RNTupleModel encapulates the schema of an ntuple.
std::string GetDescription() const
RFieldRoot * GetRootField() const
RNTupleVersion GetVersion() const
Common user-tunable settings for reading ntuples.
For forward and backward compatibility, attach version information to the consitituents of the file f...
Common user-tunable settings for storing ntuples.
basic_string_view< char > string_view
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:42
RClusterSize ClusterSize_t
Definition: RNTupleUtil.hxx:57
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:78
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:74
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:79
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fFirstElementIndex
A 64bit element index.
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
RLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.