Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleModel.cxx
Go to the documentation of this file.
1/// \file RNTupleModel.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-15
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RError.hxx>
17#include <ROOT/RField.hxx>
18#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/StringUtils.hxx>
21
22#include <atomic>
23#include <cstdlib>
24#include <memory>
25#include <utility>
26
27namespace {
28std::uint64_t GetNewModelId()
29{
30 static std::atomic<std::uint64_t> gLastModelId = 0;
31 return ++gLastModelId;
32}
33} // anonymous namespace
34
35//------------------------------------------------------------------------------
36
39 const FieldMap_t &fieldMap)
40{
41 auto source = fieldMap.at(target);
42 const bool hasCompatibleStructure =
43 (source->GetStructure() == target->GetStructure()) ||
44 ((source->GetStructure() == ENTupleStructure::kCollection) && dynamic_cast<const RCardinalityField *>(target));
45 if (!hasCompatibleStructure)
46 return R__FAIL("field mapping structural mismatch: " + source->GetFieldName() + " --> " + target->GetFieldName());
47 if ((source->GetStructure() == ENTupleStructure::kLeaf) || (source->GetStructure() == ENTupleStructure::kUnsplit)) {
48 if (target->GetTypeName() != source->GetTypeName())
49 return R__FAIL("field mapping type mismatch: " + source->GetFieldName() + " --> " + target->GetFieldName());
50 }
51
52 // We support projections only across records and collections. In the following, we check that the projected
53 // field is on the same path of collection fields in the field tree than the source field.
54
55 // Finds the first non-record parent field of the input field
56 auto fnBreakPoint = [](const RFieldBase *f) -> const RFieldBase * {
57 auto parent = f->GetParent();
58 while (parent) {
59 if (parent->GetStructure() != ENTupleStructure::kRecord)
60 return parent;
61 parent = parent->GetParent();
62 }
63 // We reached the zero field
64 return nullptr;
65 };
66
67 // If source or target has a variant or reference as a parent, error out
68 auto *sourceBreakPoint = fnBreakPoint(source);
69 if (sourceBreakPoint && sourceBreakPoint->GetStructure() != ENTupleStructure::kCollection)
70 return R__FAIL("unsupported field mapping (source structure)");
71 auto *targetBreakPoint = fnBreakPoint(target);
72 if (targetBreakPoint && sourceBreakPoint->GetStructure() != ENTupleStructure::kCollection)
73 return R__FAIL("unsupported field mapping (target structure)");
74
75 if (!sourceBreakPoint && !targetBreakPoint) {
76 // Source and target have no collections as parent
78 }
79 if (sourceBreakPoint && targetBreakPoint) {
80 if (sourceBreakPoint == targetBreakPoint) {
81 // Source and target are children of the same collection
83 }
84 if (auto it = fieldMap.find(targetBreakPoint); it != fieldMap.end() && it->second == sourceBreakPoint) {
85 // The parent collection of parent is mapped to the parent collection of the source
87 }
88 // Source and target are children of different collections
89 return R__FAIL("field mapping structure mismatch: " + source->GetFieldName() + " --> " + target->GetFieldName());
90 }
91
92 // Either source or target have no collection as a parent, but the other one has; that doesn't fit
93 return R__FAIL("field mapping structure mismatch: " + source->GetFieldName() + " --> " + target->GetFieldName());
94}
95
97ROOT::Experimental::RNTupleModel::RProjectedFields::Add(std::unique_ptr<RFieldBase> field, const FieldMap_t &fieldMap)
98{
99 auto result = EnsureValidMapping(field.get(), fieldMap);
100 if (!result)
101 return R__FORWARD_ERROR(result);
102 for (const auto &f : *field) {
103 result = EnsureValidMapping(&f, fieldMap);
104 if (!result)
105 return R__FORWARD_ERROR(result);
106 }
107
108 fFieldMap.insert(fieldMap.begin(), fieldMap.end());
109 fFieldZero->Attach(std::move(field));
110 return RResult<void>::Success();
111}
112
115{
116 if (auto it = fFieldMap.find(target); it != fFieldMap.end())
117 return it->second;
118 return nullptr;
119}
120
121std::unique_ptr<ROOT::Experimental::RNTupleModel::RProjectedFields>
123{
124 auto cloneFieldZero = std::unique_ptr<RFieldZero>(static_cast<RFieldZero *>(fFieldZero->Clone("").release()));
125 auto clone = std::unique_ptr<RProjectedFields>(new RProjectedFields(std::move(cloneFieldZero)));
126 clone->fModel = newModel;
127 // TODO(jblomer): improve quadratic search to re-wire the field mappings given the new model and the cloned
128 // projected fields. Not too critical as we generally expect a limited number of projected fields
129 for (const auto &[k, v] : fFieldMap) {
130 for (const auto &f : *clone->GetFieldZero()) {
131 if (f.GetQualifiedFieldName() == k->GetQualifiedFieldName()) {
132 clone->fFieldMap[&f] = clone->fModel->FindField(v->GetQualifiedFieldName());
133 break;
134 }
135 }
136 }
137 return clone;
138}
139
141 : fWriter(writer), fOpenChangeset(fWriter.GetUpdatableModel())
142{
143}
144
146{
147 fOpenChangeset.fModel.Unfreeze();
148 // We set the model ID to zero until CommitUpdate(). That prevents calls to RNTupleWriter::Fill() in the middle
149 // of updates
150 std::swap(fOpenChangeset.fModel.fModelId, fNewModelId);
151}
152
154{
155 fOpenChangeset.fModel.Freeze();
156 std::swap(fOpenChangeset.fModel.fModelId, fNewModelId);
157 if (fOpenChangeset.IsEmpty())
158 return;
159 Internal::RNTupleModelChangeset toCommit{fOpenChangeset.fModel};
160 std::swap(fOpenChangeset.fAddedFields, toCommit.fAddedFields);
161 std::swap(fOpenChangeset.fAddedProjectedFields, toCommit.fAddedProjectedFields);
162 fWriter.GetSink().UpdateSchema(toCommit, fWriter.GetNEntries());
163}
164
165void ROOT::Experimental::RNTupleModel::RUpdater::AddField(std::unique_ptr<RFieldBase> field)
166{
167 auto fieldp = field.get();
168 fOpenChangeset.fModel.AddField(std::move(field));
169 fOpenChangeset.fAddedFields.emplace_back(fieldp);
170}
171
174 FieldMappingFunc_t mapping)
175{
176 auto fieldp = field.get();
177 auto result = fOpenChangeset.fModel.AddProjectedField(std::move(field), mapping);
178 if (result)
179 fOpenChangeset.fAddedProjectedFields.emplace_back(fieldp);
181}
182
184{
185 RResult<void> nameValid = RFieldBase::EnsureValidFieldName(fieldName);
186 if (!nameValid) {
187 nameValid.Throw();
188 }
189 auto fieldNameStr = std::string(fieldName);
190 if (fFieldNames.count(fieldNameStr) > 0)
191 throw RException(R__FAIL("field name '" + fieldNameStr + "' already exists in NTuple model"));
192}
193
195{
196 if (IsFrozen())
197 throw RException(R__FAIL("invalid attempt to modify frozen model"));
198}
199
201{
202 if (IsBare())
203 throw RException(R__FAIL("invalid attempt to use default entry of bare model"));
204}
205
206ROOT::Experimental::RNTupleModel::RNTupleModel(std::unique_ptr<RFieldZero> fieldZero)
207 : fFieldZero(std::move(fieldZero)), fModelId(GetNewModelId()), fSchemaId(fModelId)
208{}
209
210std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleModel::CreateBare()
211{
212 return CreateBare(std::make_unique<RFieldZero>());
213}
214
215std::unique_ptr<ROOT::Experimental::RNTupleModel>
216ROOT::Experimental::RNTupleModel::CreateBare(std::unique_ptr<RFieldZero> fieldZero)
217{
218 auto model = std::unique_ptr<RNTupleModel>(new RNTupleModel(std::move(fieldZero)));
219 model->fProjectedFields = std::make_unique<RProjectedFields>(model.get());
220 return model;
221}
222
223std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleModel::Create()
224{
225 return Create(std::make_unique<RFieldZero>());
226}
227
228std::unique_ptr<ROOT::Experimental::RNTupleModel>
229ROOT::Experimental::RNTupleModel::Create(std::unique_ptr<RFieldZero> fieldZero)
230{
231 auto model = CreateBare(std::move(fieldZero));
232 model->fDefaultEntry = std::unique_ptr<REntry>(new REntry(model->fModelId, model->fSchemaId));
233 return model;
234}
235
236std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleModel::Clone() const
237{
238 auto cloneModel = std::unique_ptr<RNTupleModel>(
239 new RNTupleModel(std::unique_ptr<RFieldZero>(static_cast<RFieldZero *>(fFieldZero->Clone("").release()))));
240 cloneModel->fModelId = GetNewModelId();
241 // For a frozen model, we can keep the schema id because adding new fields is forbidden. It is reset in Unfreeze()
242 // if called by the user.
243 if (fIsFrozen) {
244 cloneModel->fSchemaId = fSchemaId;
245 } else {
246 cloneModel->fSchemaId = cloneModel->fModelId;
247 }
248 cloneModel->fIsFrozen = fIsFrozen;
249 cloneModel->fFieldNames = fFieldNames;
250 cloneModel->fDescription = fDescription;
251 cloneModel->fProjectedFields = fProjectedFields->Clone(cloneModel.get());
252 if (fDefaultEntry) {
253 cloneModel->fDefaultEntry = std::unique_ptr<REntry>(new REntry(cloneModel->fModelId, cloneModel->fSchemaId));
254 for (const auto &f : cloneModel->fFieldZero->GetSubFields()) {
255 cloneModel->fDefaultEntry->AddValue(f->CreateValue());
256 }
257 }
258 return cloneModel;
259}
260
262{
263 if (fieldName.empty())
264 return nullptr;
265
266 auto *field = static_cast<ROOT::Experimental::RFieldBase *>(fFieldZero.get());
267 for (auto subfieldName : ROOT::Split(fieldName, ".")) {
268 const auto subfields = field->GetSubFields();
269 auto it = std::find_if(subfields.begin(), subfields.end(),
270 [&](const auto *f) { return f->GetFieldName() == subfieldName; });
271 if (it != subfields.end()) {
272 field = *it;
273 } else {
274 field = nullptr;
275 break;
276 }
277 }
278
279 return field;
280}
281
282void ROOT::Experimental::RNTupleModel::AddField(std::unique_ptr<RFieldBase> field)
283{
284 EnsureNotFrozen();
285 if (!field)
286 throw RException(R__FAIL("null field"));
287 EnsureValidFieldName(field->GetFieldName());
288
289 if (fDefaultEntry)
290 fDefaultEntry->AddValue(field->CreateValue());
291 fFieldNames.insert(field->GetFieldName());
292 fFieldZero->Attach(std::move(field));
293}
294
297{
298 EnsureNotFrozen();
299 if (!field)
300 return R__FAIL("null field");
301 auto fieldName = field->GetFieldName();
302
304 auto sourceField = FindField(mapping(fieldName));
305 if (!sourceField)
306 return R__FAIL("no such field: " + mapping(fieldName));
307 fieldMap[field.get()] = sourceField;
308 for (const auto &subField : *field) {
309 sourceField = FindField(mapping(subField.GetQualifiedFieldName()));
310 if (!sourceField)
311 return R__FAIL("no such field: " + mapping(fieldName));
312 fieldMap[&subField] = sourceField;
313 }
314
315 EnsureValidFieldName(fieldName);
316 auto result = fProjectedFields->Add(std::move(field), fieldMap);
317 if (!result) {
318 return R__FORWARD_ERROR(result);
319 }
320 fFieldNames.insert(fieldName);
321 return RResult<void>::Success();
322}
323
325{
326 if (!IsFrozen())
327 throw RException(R__FAIL("invalid attempt to get mutable zero field of unfrozen model"));
328 return *fFieldZero;
329}
330
332{
333 auto f = FindField(fieldName);
334 if (!f)
335 throw RException(R__FAIL("invalid field: " + std::string(fieldName)));
336
337 return *f;
338}
339
341{
342 EnsureNotBare();
343 return *fDefaultEntry;
344}
345
347{
348 if (!IsFrozen())
349 throw RException(R__FAIL("invalid attempt to get default entry of unfrozen model"));
350 EnsureNotBare();
351 return *fDefaultEntry;
352}
353
354std::unique_ptr<ROOT::Experimental::REntry> ROOT::Experimental::RNTupleModel::CreateEntry() const
355{
356 if (!IsFrozen())
357 throw RException(R__FAIL("invalid attempt to create entry of unfrozen model"));
358
359 auto entry = std::unique_ptr<REntry>(new REntry(fModelId, fSchemaId));
360 for (const auto &f : fFieldZero->GetSubFields()) {
361 entry->AddValue(f->CreateValue());
362 }
363 return entry;
364}
365
366std::unique_ptr<ROOT::Experimental::REntry> ROOT::Experimental::RNTupleModel::CreateBareEntry() const
367{
368 if (!IsFrozen())
369 throw RException(R__FAIL("invalid attempt to create entry of unfrozen model"));
370
371 auto entry = std::unique_ptr<REntry>(new REntry(fModelId, fSchemaId));
372 for (const auto &f : fFieldZero->GetSubFields()) {
373 entry->AddValue(f->BindValue(nullptr));
374 }
375 return entry;
376}
377
379{
380 const auto &topLevelFields = fFieldZero->GetSubFields();
381 auto it = std::find_if(topLevelFields.begin(), topLevelFields.end(),
382 [&fieldName](const RFieldBase *f) { return f->GetFieldName() == fieldName; });
383
384 if (it == topLevelFields.end()) {
385 throw RException(R__FAIL("invalid field name: " + std::string(fieldName)));
386 }
387 return REntry::RFieldToken(std::distance(topLevelFields.begin(), it), fSchemaId);
388}
389
391{
392 if (!IsFrozen())
393 throw RException(R__FAIL("invalid attempt to create bulk of unfrozen model"));
394
395 auto f = FindField(fieldName);
396 if (!f)
397 throw RException(R__FAIL("no such field: " + std::string(fieldName)));
398 return f->CreateBulk();
399}
400
402{
403 if (!IsFrozen())
404 return;
405
406 fModelId = GetNewModelId();
407 fSchemaId = fModelId;
408 if (fDefaultEntry) {
409 fDefaultEntry->fModelId = fModelId;
410 fDefaultEntry->fSchemaId = fSchemaId;
411 }
412 fIsFrozen = false;
413}
414
416{
417 fIsFrozen = true;
418}
419
420void ROOT::Experimental::RNTupleModel::SetDescription(std::string_view description)
421{
422 EnsureNotFrozen();
423 fDescription = std::string(description);
424}
425
427{
428 std::size_t bytes = 0;
429 std::size_t minPageBufferSize = 0;
430
431 // Start with the size of the page buffers used to fill a persistent sink
432 std::size_t nColumns = 0;
433 for (auto &&field : *fFieldZero) {
434 for (const auto &r : field.GetColumnRepresentatives()) {
435 nColumns += r.size();
436 for (auto columnType : r) {
437 minPageBufferSize +=
439 }
440 }
441 }
442 bytes = std::min(options.GetPageBufferBudget(), nColumns * options.GetMaxUnzippedPageSize());
443
444 // If using buffered writing with RPageSinkBuf, we create a clone of the model and keep at least
445 // the compressed pages in memory.
446 if (options.GetUseBufferedWrite()) {
447 bytes += minPageBufferSize;
448 // Use the target cluster size as an estimate for all compressed pages combined.
450 int compression = options.GetCompression();
451 if (compression != 0 && options.GetUseImplicitMT() == RNTupleWriteOptions::EImplicitMT::kDefault) {
452 // With IMT, compression happens asynchronously which means that the uncompressed pages also stay around. Use a
453 // compression factor of 2x as a very rough estimate.
454 bytes += 2 * options.GetApproxZippedClusterSize();
455 }
456 }
457
458 return bytes;
459}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:294
#define R__FORWARD_RESULT(res)
Short-hand to return an RResult<T> value from a subroutine to the calling stack frame.
Definition RError.hxx:292
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define f(i)
Definition RSha256.hxx:104
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
An artificial field that transforms an RNTuple column that contains the offset of collections into co...
Definition RField.hxx:276
The field token identifies a top-level field in this entry.
Definition REntry.hxx:59
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:50
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Similar to RValue but manages an array of consecutive values.
A field translates read and write calls from/to underlying columns to/from tree values.
const RFieldBase * GetParent() const
std::vector< RFieldBase * > GetSubFields()
Definition RField.cxx:1017
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
Definition RField.cxx:910
The container field for an ntuple model, which itself has no physical representation.
Definition RField.hxx:58
Projected fields are fields whose columns are reused from existing fields.
RResult< void > EnsureValidMapping(const RFieldBase *target, const FieldMap_t &fieldMap)
Asserts that the passed field is a valid target of the source field provided in the field map.
std::unordered_map< const RFieldBase *, const RFieldBase * > FieldMap_t
The map keys are the projected target fields, the map values are the backing source fields Note that ...
const RFieldBase * GetSourceField(const RFieldBase *target) const
RResult< void > Add(std::unique_ptr< RFieldBase > field, const FieldMap_t &fieldMap)
Adds a new projected field.
std::unique_ptr< RProjectedFields > Clone(const RNTupleModel *newModel) const
The new model needs to be a clone of fModel.
void CommitUpdate()
Commit changes since the last call to BeginUpdate().
void BeginUpdate()
Begin a new set of alterations to the underlying model.
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, FieldMappingFunc_t mapping)
void AddField(std::unique_ptr< RFieldBase > field)
The RNTupleModel encapulates the schema of an ntuple.
std::unordered_set< std::string > fFieldNames
Keeps track of which field names are taken, including projected field names.
void EnsureValidFieldName(std::string_view fieldName)
Checks that user-provided field names are valid in the context of this NTuple model.
std::uint64_t fModelId
Every model has a unique ID to distinguish it from other models.
std::function< std::string(const std::string &)> FieldMappingFunc_t
User provided function that describes the mapping of existing source fields to projected fields in te...
std::uint64_t fSchemaId
Models have a separate schema ID to remember that the clone of a frozen model still has the same sche...
REntry::RFieldToken GetToken(std::string_view fieldName) const
Creates a token to be used in REntry methods to address a top-level field.
void EnsureNotBare() const
Throws an RException if fDefaultEntry is nullptr.
std::unique_ptr< RNTupleModel > Clone() const
void EnsureNotFrozen() const
Throws an RException if fFrozen is true.
std::size_t EstimateWriteMemoryUsage(const RNTupleWriteOptions &options=RNTupleWriteOptions()) const
Estimate the memory usage for this model during writing.
const RFieldBase & GetField(std::string_view fieldName) const
std::unique_ptr< REntry > CreateBareEntry() const
In a bare entry, all values point to nullptr.
std::unique_ptr< REntry > CreateEntry() const
RFieldBase::RBulk CreateBulk(std::string_view fieldName) const
Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exis...
static std::unique_ptr< RNTupleModel > Create()
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, FieldMappingFunc_t mapping)
Adds a top-level field based on existing fields.
void SetDescription(std::string_view description)
RFieldBase * FindField(std::string_view fieldName) const
The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the...
RNTupleModel(std::unique_ptr< RFieldZero > fieldZero)
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
void AddField(std::unique_ptr< RFieldBase > field)
Adds a field whose type is not known at compile time.
RFieldZero & GetFieldZero()
Non-const access to the root field is used to commit clusters during writing, and to make adjustments...
std::unique_ptr< RFieldZero > fFieldZero
Hierarchy of fields consisting of simple types and collections (sub trees)
Common user-tunable settings for storing ntuples.
An RNTuple that gets filled with entries (data) and writes them to storage.
void Throw()
Throws an RException with fError.
Definition RError.cxx:67
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
std::vector< std::string > Split(std::string_view str, std::string_view delims, bool skipEmpty=false)
Splits a string at each character in delims.
The incremental changes to a RNTupleModel