Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleModel.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleModel.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleModel
17#define ROOT7_RNTupleModel
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
21#include <ROOT/RField.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <string_view>
24
25#include <cstdint>
26#include <functional>
27#include <memory>
28#include <string>
29#include <unordered_map>
30#include <unordered_set>
31#include <utility>
32
33namespace ROOT {
34namespace Experimental {
35
36class RNTupleModel;
37class RNTupleWriter;
38class RNTupleWriteOptions;
39
40namespace Internal {
41class RProjectedFields;
42
43RFieldZero &GetFieldZeroOfModel(RNTupleModel &model);
44RProjectedFields &GetProjectedFieldsOfModel(RNTupleModel &model);
45
46// clang-format off
47/**
48\class ROOT::Experimental::Internal::RProjectedFields
49\ingroup NTuple
50\brief The projected fields of a `RNTupleModel`
51
52Projected fields are fields whose columns are reused from existing fields. Projected fields are not attached
53to the models zero field. Only the real source fields are written to, projected fields are stored as meta-data
54(header) information only. Only top-level projected fields are supported because otherwise the layout of types
55could be altered in unexpected ways.
56All projected fields and the source fields used to back them are kept in this class.
57*/
58// clang-format on
60public:
61 /// The map keys are the projected target fields, the map values are the backing source fields
62 /// Note that sub fields are treated individually and indepently of their parent field
63 using FieldMap_t = std::unordered_map<const RFieldBase *, const RFieldBase *>;
64
65private:
66 explicit RProjectedFields(std::unique_ptr<RFieldZero> fieldZero) : fFieldZero(std::move(fieldZero)) {}
67 /// The projected fields are attached to this zero field
68 std::unique_ptr<RFieldZero> fFieldZero;
69 /// Maps the source fields from fModel to the target projected fields attached to fFieldZero
71 /// The model this set of projected fields belongs to
73
74 /// Asserts that the passed field is a valid target of the source field provided in the field map.
75 /// Checks the field without looking into sub fields.
77
78public:
79 explicit RProjectedFields(const RNTupleModel &model) : fFieldZero(std::make_unique<RFieldZero>()), fModel(&model) {}
84 ~RProjectedFields() = default;
85
86 /// The new model needs to be a clone of fModel
87 std::unique_ptr<RProjectedFields> Clone(const RNTupleModel &newModel) const;
88
90 const RFieldBase *GetSourceField(const RFieldBase *target) const;
91 /// Adds a new projected field. The field map needs to provide valid source fields of fModel for 'field'
92 /// and each of its sub fields.
93 RResult<void> Add(std::unique_ptr<RFieldBase> field, const FieldMap_t &fieldMap);
94 bool IsEmpty() const { return fFieldZero->begin() == fFieldZero->end(); }
95};
96
97// clang-format off
98/**
99\class ROOT::Experimental::Internal::RNTupleModelChangeset
100\ingroup NTuple
101\brief The incremental changes to a `RNTupleModel`
102
103Represents a set of alterations to a `RNTupleModel` that happened after the model is used to initialize a `RPageSink`
104instance. This object can be used to communicate metadata updates to a `RPageSink`.
105You will not normally use this directly; see `RNTupleModel::RUpdater` instead.
106*/
107// clang-format on
110 /// Points to the fields in fModel that were added as part of an updater transaction
111 std::vector<RFieldBase *> fAddedFields;
112 /// Points to the projected fields in fModel that were added as part of an updater transaction
113 std::vector<RFieldBase *> fAddedProjectedFields;
114
116 bool IsEmpty() const { return fAddedFields.empty() && fAddedProjectedFields.empty(); }
117};
118
119} // namespace Internal
120
121// clang-format off
122/**
123\class ROOT::Experimental::RNTupleModel
124\ingroup NTuple
125\brief The RNTupleModel encapulates the schema of an ntuple.
126
127The ntuple model comprises a collection of hierarchically organized fields. From a model, "entries"
128can be extracted. For convenience, the model provides a default entry unless it is created as a "bare model".
129Models have a unique model identifier that faciliates checking whether entries are compatible with it
130(i.e.: have been extracted from that model).
131
132A model is subject to a state transition during its lifetime: it starts in a building state, in which fields can be
133added and modified. Once the schema is finalized, the model gets frozen. Only frozen models can create entries.
134From frozen, models move into a expired state. In this state, the model is only partially usable: it can be cloned
135and queried, but it can't be unfrozen anymore and no new entries can be created. This state is used for models
136that were used for writing and are no longer connected to a page sink.
137*/
138// clang-format on
142
143public:
144 /// User provided function that describes the mapping of existing source fields to projected fields in terms
145 /// of fully qualified field names. The mapping function is called with the qualified field names of the provided
146 /// field and the subfields. It should return the qualified field names used as a mapping source.
147 using FieldMappingFunc_t = std::function<std::string(const std::string &)>;
148
149 /// A wrapper over a field name and an optional description; used in `AddField()` and `RUpdater::AddField()`
152 NameWithDescription_t(const std::string &name) : fName(name) {}
153 NameWithDescription_t(std::string_view name) : fName(name) {}
154 NameWithDescription_t(std::string_view name, std::string_view descr) : fName(name), fDescription(descr) {}
155
156 std::string_view fName;
157 std::string_view fDescription = "";
158 };
159
160 /// A model is usually immutable after passing it to an `RNTupleWriter`. However, for the rare
161 /// cases that require changing the model after the fact, `RUpdater` provides limited support for
162 /// incremental updates, e.g. addition of new fields.
163 ///
164 /// See `RNTupleWriter::CreateModelUpdater()` for an example.
165 class RUpdater {
166 private:
169 std::uint64_t fNewModelId = 0; ///< The model ID after committing
170
171 public:
172 explicit RUpdater(RNTupleWriter &writer);
174 /// Begin a new set of alterations to the underlying model. As a side effect, all `REntry` instances related to
175 /// the model are invalidated.
176 void BeginUpdate();
177 /// Commit changes since the last call to `BeginUpdate()`. All the invalidated `REntry`s remain invalid.
178 /// `CreateEntry()` or `CreateBareEntry()` can be used to create an `REntry` that matching the new model.
179 /// Upon completion, `BeginUpdate()` can be called again to begin a new set of changes.
180 void CommitUpdate();
181
182 template <typename T>
184 {
187 auto it = std::find_if(fieldZero->begin(), fieldZero->end(),
188 [&](const auto &f) { return f.GetFieldName() == fieldNameDesc.fName; });
189 R__ASSERT(it != fieldZero->end());
190 fOpenChangeset.fAddedFields.emplace_back(&(*it));
191 return objPtr;
192 }
193
194 void AddField(std::unique_ptr<RFieldBase> field);
195
196 RResult<void> AddProjectedField(std::unique_ptr<RFieldBase> field, FieldMappingFunc_t mapping);
197 };
198
199private:
200 // The states a model can be in. Possible transitions are between kBuilding and kFrozen
201 // and from kFrozen to kExpired.
202 enum class EState {
203 kBuilding,
204 kFrozen,
206 };
207
208 /// Hierarchy of fields consisting of simple types and collections (sub trees)
209 std::unique_ptr<RFieldZero> fFieldZero;
210 /// Contains field values corresponding to the created top-level fields, as well as registered subfields
211 std::unique_ptr<REntry> fDefaultEntry;
212 /// Keeps track of which field names are taken, including projected field names.
213 std::unordered_set<std::string> fFieldNames;
214 /// Free text set by the user
215 std::string fDescription;
216 /// The set of projected top-level fields
217 std::unique_ptr<Internal::RProjectedFields> fProjectedFields;
218 /// Keeps track of which subfields have been registered to be included in entries belonging to this model.
219 std::unordered_set<std::string> fRegisteredSubfields;
220 /// Every model has a unique ID to distinguish it from other models. Entries are linked to models via the ID.
221 /// Cloned models get a new model ID. Expired models are cloned into frozen models.
222 std::uint64_t fModelId = 0;
223 /// Models have a separate schema ID to remember that the clone of a frozen model still has the same schema.
224 std::uint64_t fSchemaId = 0;
225 /// Changed by Freeze() / Unfreeze() and by the RUpdater.
227
228 /// Checks that user-provided field names are valid in the context of this RNTuple model.
229 /// Throws an RException for invalid names, empty names (which is reserved for the zero field) and duplicate field
230 /// names.
231 void EnsureValidFieldName(std::string_view fieldName);
232
233 /// Throws an RException if fFrozen is true
234 void EnsureNotFrozen() const;
235
236 /// Throws an RException if fDefaultEntry is nullptr
237 void EnsureNotBare() const;
238
239 /// The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the model.
240 RFieldBase *FindField(std::string_view fieldName) const;
241
242 /// Add a subfield to the provided entry. If `initializeValue` is false, a nullptr will be bound to the entry value
243 /// (used in bare models).
244 void AddSubfield(std::string_view fieldName, REntry &entry, bool initializeValue = true) const;
245
246 RNTupleModel(std::unique_ptr<RFieldZero> fieldZero);
247
248public:
249 RNTupleModel(const RNTupleModel&) = delete;
251 ~RNTupleModel() = default;
252
253 std::unique_ptr<RNTupleModel> Clone() const;
254 static std::unique_ptr<RNTupleModel> Create();
255 static std::unique_ptr<RNTupleModel> Create(std::unique_ptr<RFieldZero> fieldZero);
256 /// A bare model has no default entry
257 static std::unique_ptr<RNTupleModel> CreateBare();
258 static std::unique_ptr<RNTupleModel> CreateBare(std::unique_ptr<RFieldZero> fieldZero);
259
260 /// Creates a new field given a `name` or `{name, description}` pair and a
261 /// corresponding, default-constructed value that is managed by a shared pointer.
262 ///
263 /// **Example: create some fields and fill an %RNTuple**
264 /// ~~~ {.cpp}
265 /// #include <ROOT/RNTupleModel.hxx>
266 /// #include <ROOT/RNTupleWriter.hxx>
267 /// using ROOT::Experimental::RNTupleModel;
268 /// using ROOT::Experimental::RNTupleWriter;
269 ///
270 /// #include <vector>
271 ///
272 /// auto model = RNTupleModel::Create();
273 /// auto pt = model->MakeField<float>("pt");
274 /// auto vec = model->MakeField<std::vector<int>>("vec");
275 ///
276 /// // The RNTuple is written to disk when the RNTupleWriter goes out of scope
277 /// {
278 /// auto writer = RNTupleWriter::Recreate(std::move(model), "myNTuple", "myFile.root");
279 /// for (int i = 0; i < 100; i++) {
280 /// *pt = static_cast<float>(i);
281 /// *vec = {i, i+1, i+2};
282 /// writer->Fill();
283 /// }
284 /// }
285 /// ~~~
286 ///
287 /// **Example: create a field with a description**
288 /// ~~~ {.cpp}
289 /// #include <ROOT/RNTupleModel.hxx>
290 /// using ROOT::Experimental::RNTupleModel;
291 ///
292 /// auto model = RNTupleModel::Create();
293 /// auto hadronFlavour = model->MakeField<float>({
294 /// "hadronFlavour", "flavour from hadron ghost clustering"
295 /// });
296 /// ~~~
297 template <typename T>
299 {
302 auto field = std::make_unique<RField<T>>(fieldNameDesc.fName);
303 field->SetDescription(fieldNameDesc.fDescription);
304 std::shared_ptr<T> ptr;
305 if (fDefaultEntry)
306 ptr = fDefaultEntry->AddValue<T>(*field);
307 fFieldNames.insert(field->GetFieldName());
308 fFieldZero->Attach(std::move(field));
309 return ptr;
310 }
311
312 /// Adds a field whose type is not known at compile time. Thus there is no shared pointer returned.
313 ///
314 /// Throws an exception if the field is null.
315 void AddField(std::unique_ptr<RFieldBase> field);
316
317 /// Register a subfield so it can be accessed directly from entries belonging to the model. Because registering a
318 /// subfield does not fundamentally change the model, previously created entries will not be invalidated, nor
319 /// modified in any way; a registered subfield is merely an accessor added to the default entry (if present) and any
320 /// entries created afterwards.
321 ///
322 /// Using models with registered subfields for writing is not allowed. Attempting to do so will result in an
323 /// exception.
324 ///
325 /// Throws an exception if the provided subfield could not be found in the model.
326 void RegisterSubfield(std::string_view qualifiedFieldName);
327
328 /// Adds a top-level field based on existing fields.
329 ///
330 /// The mapping function takes one argument, which is a string containing the name of the projected field. The return
331 /// value of the mapping function should be the name of the (existing) field onto which the projection is made.
332 /// **Example**
333 /// ~~~ {.cpp}
334 /// auto model = RNTupleModel::Create();
335 /// model->MakeField<float>("met");
336 /// auto metProjection = RFieldBase::Create("missingE", "float").Unwrap();
337 /// model->AddProjectedField(std::move(metProjection), [](const std::string &) { return "met"; });
338 /// ~~~
339 ///
340 /// Adding projections for collection fields is also possible, as long as they follow the same schema structure. For
341 /// example, a projection of a collection of structs onto a collection of scalars is possible, but a projection of a
342 /// collection of a collection of scalars onto a collection of scalars is not.
343 ///
344 /// In the case of projections for nested fields, the mapping function must provide a mapping for every nesting
345 /// level.
346 /// **Example**
347 /// ~~~ {.cpp}
348 /// struct P { int x, y; };
349 ///
350 /// auto model = RNTupleModel::Create();
351 /// model->MakeField<std::vector<P>>("points");
352 /// auto pxProjection = RFieldBase::Create("pxs", "std::vector<int>").Unwrap();
353 /// model->AddProjectedField(std::move(pxProjection), [](const std::string &fieldName) {
354 /// if (fieldName == "pxs")
355 /// return "points";
356 /// else
357 /// return "points._0.x";
358 /// });
359 /// ~~~
360 ///
361 /// Creating projections for fields containing `std::variant` or fixed-size arrays is unsupported.
362 RResult<void> AddProjectedField(std::unique_ptr<RFieldBase> field, FieldMappingFunc_t mapping);
363
364 void Freeze();
365 void Unfreeze();
366 void Expire();
367 bool IsExpired() const { return fModelState == EState::kExpired; }
369 bool IsBare() const { return !fDefaultEntry; }
370 std::uint64_t GetModelId() const { return fModelId; }
371 std::uint64_t GetSchemaId() const { return fSchemaId; }
372
373 std::unique_ptr<REntry> CreateEntry() const;
374 /// In a bare entry, all values point to nullptr. The resulting entry shall use BindValue() in order
375 /// set memory addresses to be serialized / deserialized
376 std::unique_ptr<REntry> CreateBareEntry() const;
377 /// Creates a token to be used in REntry methods to address a field present in the entry
378 REntry::RFieldToken GetToken(std::string_view fieldName) const;
379 /// Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exists.
380 RFieldBase::RBulk CreateBulk(std::string_view fieldName) const;
381
383 const REntry &GetDefaultEntry() const;
384
385 /// Mutable access to the root field is used to make adjustments to the fields.
387 const RFieldZero &GetConstFieldZero() const { return *fFieldZero; }
388 RFieldBase &GetMutableField(std::string_view fieldName);
389 const RFieldBase &GetConstField(std::string_view fieldName) const;
390
391 const std::string &GetDescription() const { return fDescription; }
392 void SetDescription(std::string_view description);
393
394 /// Get the (qualified) names of subfields that have been registered to be included in entries from this model.
395 const std::unordered_set<std::string> &GetRegisteredSubfields() const { return fRegisteredSubfields; }
396
397 /// Estimate the memory usage for this model during writing
398 ///
399 /// This will return an estimate in bytes for the internal page and compression buffers. The value should be
400 /// understood per sequential RNTupleWriter or per RNTupleFillContext created for a RNTupleParallelWriter
401 /// constructed with this model.
402 std::size_t EstimateWriteMemoryUsage(const RNTupleWriteOptions &options = RNTupleWriteOptions()) const;
403};
404
405} // namespace Experimental
406} // namespace ROOT
407
408#endif
#define f(i)
Definition RSha256.hxx:104
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
char name[80]
Definition TGX11.cxx:110
The projected fields of a RNTupleModel
RResult< void > EnsureValidMapping(const RFieldBase *target, const FieldMap_t &fieldMap)
Asserts that the passed field is a valid target of the source field provided in the field map.
FieldMap_t fFieldMap
Maps the source fields from fModel to the target projected fields attached to fFieldZero.
const RNTupleModel * fModel
The model this set of projected fields belongs to.
std::unique_ptr< RProjectedFields > Clone(const RNTupleModel &newModel) const
The new model needs to be a clone of fModel.
RProjectedFields(const RProjectedFields &)=delete
RProjectedFields & operator=(RProjectedFields &&)=default
std::unordered_map< const RFieldBase *, const RFieldBase * > FieldMap_t
The map keys are the projected target fields, the map values are the backing source fields Note that ...
RResult< void > Add(std::unique_ptr< RFieldBase > field, const FieldMap_t &fieldMap)
Adds a new projected field.
const RFieldBase * GetSourceField(const RFieldBase *target) const
RProjectedFields(std::unique_ptr< RFieldZero > fieldZero)
RProjectedFields(RProjectedFields &&)=default
RProjectedFields & operator=(const RProjectedFields &)=delete
std::unique_ptr< RFieldZero > fFieldZero
The projected fields are attached to this zero field.
The field token identifies a (sub)field in this entry.
Definition REntry.hxx:63
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
Similar to RValue but manages an array of consecutive values.
A field translates read and write calls from/to underlying columns to/from tree values.
The container field for an ntuple model, which itself has no physical representation.
Definition RField.hxx:58
A model is usually immutable after passing it to an RNTupleWriter.
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, FieldMappingFunc_t mapping)
std::shared_ptr< T > MakeField(const NameWithDescription_t &fieldNameDesc)
Internal::RNTupleModelChangeset fOpenChangeset
void CommitUpdate()
Commit changes since the last call to BeginUpdate().
void BeginUpdate()
Begin a new set of alterations to the underlying model.
std::uint64_t fNewModelId
The model ID after committing.
void AddField(std::unique_ptr< RFieldBase > field)
The RNTupleModel encapulates the schema of an ntuple.
std::unordered_set< std::string > fRegisteredSubfields
Keeps track of which subfields have been registered to be included in entries belonging to this model...
EState fModelState
Changed by Freeze() / Unfreeze() and by the RUpdater.
std::unordered_set< std::string > fFieldNames
Keeps track of which field names are taken, including projected field names.
std::string fDescription
Free text set by the user.
void EnsureValidFieldName(std::string_view fieldName)
Checks that user-provided field names are valid in the context of this RNTuple model.
std::uint64_t fModelId
Every model has a unique ID to distinguish it from other models.
std::function< std::string(const std::string &)> FieldMappingFunc_t
User provided function that describes the mapping of existing source fields to projected fields in te...
std::unique_ptr< Internal::RProjectedFields > fProjectedFields
The set of projected top-level fields.
std::uint64_t GetModelId() const
const RFieldZero & GetConstFieldZero() const
const RFieldBase & GetConstField(std::string_view fieldName) const
RNTupleModel(const RNTupleModel &)=delete
std::uint64_t fSchemaId
Models have a separate schema ID to remember that the clone of a frozen model still has the same sche...
REntry::RFieldToken GetToken(std::string_view fieldName) const
Creates a token to be used in REntry methods to address a field present in the entry.
void EnsureNotBare() const
Throws an RException if fDefaultEntry is nullptr.
std::unique_ptr< RNTupleModel > Clone() const
void EnsureNotFrozen() const
Throws an RException if fFrozen is true.
RFieldZero & GetMutableFieldZero()
Mutable access to the root field is used to make adjustments to the fields.
std::size_t EstimateWriteMemoryUsage(const RNTupleWriteOptions &options=RNTupleWriteOptions()) const
Estimate the memory usage for this model during writing.
std::shared_ptr< T > MakeField(const NameWithDescription_t &fieldNameDesc)
Creates a new field given a name or {name, description} pair and a corresponding, default-constructed...
const std::unordered_set< std::string > & GetRegisteredSubfields() const
Get the (qualified) names of subfields that have been registered to be included in entries from this ...
std::unique_ptr< REntry > CreateBareEntry() const
In a bare entry, all values point to nullptr.
std::unique_ptr< REntry > CreateEntry() const
RFieldBase::RBulk CreateBulk(std::string_view fieldName) const
Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exis...
static std::unique_ptr< RNTupleModel > Create()
std::uint64_t GetSchemaId() const
void AddSubfield(std::string_view fieldName, REntry &entry, bool initializeValue=true) const
Add a subfield to the provided entry.
void SetDescription(std::string_view description)
std::unique_ptr< REntry > fDefaultEntry
Contains field values corresponding to the created top-level fields, as well as registered subfields.
RFieldBase * FindField(std::string_view fieldName) const
The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the...
RResult< void > AddProjectedField(std::unique_ptr< RFieldBase > field, FieldMappingFunc_t mapping)
Adds a top-level field based on existing fields.
RNTupleModel(std::unique_ptr< RFieldZero > fieldZero)
RFieldBase & GetMutableField(std::string_view fieldName)
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
const std::string & GetDescription() const
void AddField(std::unique_ptr< RFieldBase > field)
Adds a field whose type is not known at compile time.
void RegisterSubfield(std::string_view qualifiedFieldName)
Register a subfield so it can be accessed directly from entries belonging to the model.
RNTupleModel & operator=(const RNTupleModel &)=delete
std::unique_ptr< RFieldZero > fFieldZero
Hierarchy of fields consisting of simple types and collections (sub trees)
Common user-tunable settings for storing ntuples.
An RNTuple that gets filled with entries (data) and writes them to storage.
const_iterator begin() const
const_iterator end() const
RProjectedFields & GetProjectedFieldsOfModel(RNTupleModel &model)
RFieldZero & GetFieldZeroOfModel(RNTupleModel &model)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The incremental changes to a RNTupleModel
std::vector< RFieldBase * > fAddedProjectedFields
Points to the projected fields in fModel that were added as part of an updater transaction.
std::vector< RFieldBase * > fAddedFields
Points to the fields in fModel that were added as part of an updater transaction.
A wrapper over a field name and an optional description; used in AddField() and RUpdater::AddField()
NameWithDescription_t(std::string_view name, std::string_view descr)