Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RNTupleProcessor
17#define ROOT_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
21#include <ROOT/RFieldToken.hxx>
24#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleUtil.hxx>
26#include <ROOT/RPageStorage.hxx>
27
28#include <memory>
29#include <string>
30#include <string_view>
31#include <vector>
32
33namespace ROOT {
34namespace Experimental {
35
36namespace Internal {
37struct RNTupleProcessorEntryLoader;
38} // namespace Internal
39
40// clang-format off
41/**
42\class ROOT::Experimental::RNTupleOpenSpec
43\ingroup NTuple
44\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
45
46An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
47TDirectory (or any of its subclasses) that contains the RNTuple.
48
49Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
50*/
51// clang-format on
53 friend class RNTupleProcessor;
56
57private:
58 std::string fNTupleName;
59 std::variant<std::string, TDirectory *> fStorage;
60
61public:
62 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
63 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
64
65 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
66};
67
68// clang-format off
69/**
70\class ROOT::Experimental::RNTupleProcessor
71\ingroup NTuple
72\brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
73
74Example usage (see ntpl012_processor.C for a full example):
75
76~~~{.cpp}
77#include <ROOT/RNTupleProcessor.hxx>
78using ROOT::Experimental::RNTupleProcessor;
79using ROOT::Experimental::RNTupleOpenSpec;
80
81std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
82auto processor = RNTupleProcessor::CreateChain(ntuples);
83
84for (const auto &entry : processor) {
85 std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl;
86}
87~~~
88
89An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage
90location of a single RNTuple. The RNTuples are processed in the order in which they were provided.
91
92The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
93read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used.
94If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown.
95
96The RNTupleProcessor provides an iterator which gives access to the REntry containing the field data for the current
97entry. Additional bookkeeping information can be obtained through the RNTupleProcessor itself.
98*/
99// clang-format on
105
106protected:
107 // clang-format off
108 /**
109 \class ROOT::Experimental::RNTupleProcessor::RFieldContext
110 \ingroup NTuple
111 \brief Manager for a field as part of the RNTupleProcessor.
112
113 An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the
114 blueprint for this particular field, and a concrete field that is connected to the page source currently connected
115 to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A
116 new concrete field that is connected to this new page source is subsequently created from the proto-field.
117 */
118 // clang-format on
120 friend class RNTupleProcessor;
124
125 private:
126 std::unique_ptr<ROOT::RFieldBase> fProtoField;
127 std::unique_ptr<ROOT::RFieldBase> fConcreteField;
129 // Which RNTuple the field belongs to, in case the field belongs to an auxiliary RNTuple, according to the order
130 // in which it was specified. For chained RNTuples, this value will always be 0.
131 std::size_t fNTupleIdx;
132
133 public:
134 RFieldContext(std::unique_ptr<ROOT::RFieldBase> protoField, ROOT::RFieldToken token, std::size_t ntupleIdx = 0)
136 {
137 }
138
139 const ROOT::RFieldBase &GetProtoField() const { return *fProtoField; }
140 /// Concrete pages need to be reset explicitly before the page source they belong to is destroyed.
142 void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
143 bool IsAuxiliary() const { return fNTupleIdx > 0; }
144 };
145
146 std::string fProcessorName;
147 std::vector<RNTupleOpenSpec> fNTuples;
148 std::unique_ptr<ROOT::REntry> fEntry;
149 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
150 /// Maps the (qualified) field name to its corresponding field context.
151 std::unordered_map<std::string, RFieldContext> fFieldContexts;
152
153 std::unique_ptr<ROOT::RNTupleModel> fModel;
154
155 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
156 /// interface.
158
159 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
160 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
161 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
162
163 /////////////////////////////////////////////////////////////////////////////
164 /// \brief Create and connect a concrete field to the current page source, based on its proto field.
166
167 /////////////////////////////////////////////////////////////////////////////
168 /// \brief Load the entry identified by the provided entry number.
169 ///
170 /// \param[in] entryNumber Entry number to load
171 ///
172 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
174
175 /////////////////////////////////////////////////////////////////////////////
176 /// \brief Point the entry's field values of the processor to the pointers from the provided entry.
177 ///
178 /// \param[in] entry The entry whose field values to use.
179 virtual void SetEntryPointers(const ROOT::REntry &entry) = 0;
180
181 /////////////////////////////////////////////////////////////////////////////
182 /// \brief Get the total number of entries in this processor
184
185 /////////////////////////////////////////////////////////////////////////////
186 /// \brief Create a new base RNTupleProcessor.
187 ///
188 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
189 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
190 /// RNTuple for RNTupleJoinProcessor.
191 /// \param[in] model The RNTupleModel representing the entries returned by the processor.
192 ///
193 /// \note Before processing, a model *must* exist. However, this is handled downstream by the RNTupleProcessor's
194 /// factory functions (CreateSingle, CreateChain and CreateJoin) and constructors.
195 RNTupleProcessor(std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model)
196 : fProcessorName(processorName), fModel(std::move(model))
197 {
198 }
199
200public:
205 virtual ~RNTupleProcessor() = default;
206
207 /////////////////////////////////////////////////////////////////////////////
208 /// \brief Get the total number of entries processed so far.
210
211 /////////////////////////////////////////////////////////////////////////////
212 /// \brief Get the entry number that is currently being processed.
214
215 /////////////////////////////////////////////////////////////////////////////
216 /// \brief Get the number of the inner processor currently being read.
217 ///
218 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
220
221 /////////////////////////////////////////////////////////////////////////////
222 /// \brief Get the name of the processor.
223 ///
224 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
225 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
226 /// primary RNTuple for RNTupleJoinProcessor.
227 const std::string &GetProcessorName() const { return fProcessorName; }
228
229 /////////////////////////////////////////////////////////////////////////////
230 /// \brief Get the model used by the processor.
231 const ROOT::RNTupleModel &GetModel() const { return *fModel; }
232
233 /////////////////////////////////////////////////////////////////////////////
234 /// \brief Get a reference to the entry used by the processor.
235 ///
236 /// \return A reference to the entry used by the processor.
237 const ROOT::REntry &GetEntry() const { return *fEntry; }
238
239 // clang-format off
240 /**
241 \class ROOT::Experimental::RNTupleProcessor::RIterator
242 \ingroup NTuple
243 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
244 */
245 // clang-format on
246 class RIterator {
247 private:
250
251 public:
252 using iterator_category = std::forward_iterator_tag;
255 using difference_type = std::ptrdiff_t;
257 using reference = const ROOT::REntry &;
258
261 {
262 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
263 // know there is nothing to load.
266 }
267 }
268
274
276 {
277 auto obj = *this;
278 ++(*this);
279 return obj;
280 }
281
283
284 friend bool operator!=(const iterator &lh, const iterator &rh)
285 {
286 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
287 }
288 friend bool operator==(const iterator &lh, const iterator &rh)
289 {
290 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
291 }
292 };
293
294 RIterator begin() { return RIterator(*this, 0); }
296
297 /////////////////////////////////////////////////////////////////////////////
298 /// \brief Create an RNTupleProcessor for a single RNTuple.
299 ///
300 /// \param[in] ntuple The name and storage location of the RNTuple to process.
301 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
302 /// one will be created based on the descriptor of the first ntuple specified.
303 ///
304 /// \return A pointer to the newly created RNTupleProcessor.
305 static std::unique_ptr<RNTupleProcessor>
306 Create(RNTupleOpenSpec ntuple, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
307
308 /////////////////////////////////////////////////////////////////////////////
309 /// \brief Create an RNTupleProcessor for a single RNTuple.
310 ///
311 /// \param[in] ntuple The name and storage location of the RNTuple to process.
312 /// \param[in] processorName The name to give to the processor. Use
313 /// Create(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the input RNTuple
314 /// instead.
315 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
316 /// one will be created based on the descriptor of the first ntuple specified.
317 ///
318 /// \return A pointer to the newly created RNTupleProcessor.
319 static std::unique_ptr<RNTupleProcessor>
320 Create(RNTupleOpenSpec ntuple, std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
321
322 /////////////////////////////////////////////////////////////////////////////
323 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
324 ///
325 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
326 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
327 /// one will be created based on the descriptor of the first RNTuple specified.
328 ///
329 /// \return A pointer to the newly created RNTupleProcessor.
330 static std::unique_ptr<RNTupleProcessor>
331 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
332
333 /////////////////////////////////////////////////////////////////////////////
334 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
335 ///
336 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
337 /// \param[in] processorName The name to give to the processor. Use
338 /// CreateChain(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the first
339 /// input RNTuple instead.
340 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
341 /// one will be created based on the descriptor of the first RNTuple specified.
342 ///
343 /// \return A pointer to the newly created RNTupleProcessor.
344 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<RNTupleOpenSpec> ntuples,
345 std::string_view processorName,
346 std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
347
348 /////////////////////////////////////////////////////////////////////////////
349 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
350 ///
351 /// \param[in] innerProcessors A list with the processors to chain.
352 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
353 /// one will be created based on the model used by the first inner processor.
354 ///
355 /// \return A pointer to the newly created RNTupleProcessor.
356 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
357 std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
358
359 /////////////////////////////////////////////////////////////////////////////
360 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
361 ///
362 /// \param[in] innerProcessors A list with the processors to chain.
363 /// \param[in] processorName The name to give to the processor. Use
364 /// CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>>, std::unique_ptr<RNTupleModel>) to automatically use
365 /// the name of the first inner processor instead.
366 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
367 /// one will be created based on the model used by the first inner processor.
368 ///
369 /// \return A pointer to the newly created RNTupleProcessor.
370 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
371 std::string_view processorName,
372 std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
373
374 /////////////////////////////////////////////////////////////////////////////
375 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
376 ///
377 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
378 /// order.
379 /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in
380 /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be
381 /// sequential.
382 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
383 /// The join is made based on the combined join field values, and therefore each field has to be present in each
384 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
385 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the
386 /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple.
387 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple
388 /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will
389 /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary
390 /// RNTuples for which the provided model is a `nullptr`.
391 ///
392 /// \return A pointer to the newly created RNTupleProcessor.
393 static std::unique_ptr<RNTupleProcessor>
394 CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
395 const std::vector<std::string> &joinFields, std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
396 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
397
398 /////////////////////////////////////////////////////////////////////////////
399 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
400 ///
401 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
402 /// order.
403 /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in
404 /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be
405 /// sequential.
406 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
407 /// The join is made based on the combined join field values, and therefore each field has to be present in each
408 /// specified RNTuple. If an empty list is provided, it is assumed that the specified RNTuple are fully aligned.
409 /// \param[in] processorName The name to give to the processor. Use
410 /// CreateJoin(const RNTupleOpenSpec &, const std::vector<RNTupleOpenSpec> &, const std::vector<std::string> &,
411 /// std::unique_ptr<RNTupleModel>, std::vector<std::unique_ptr<RNTupleModel>>) to automatically use the name of the
412 /// input RNTuple instead.
413 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple
414 /// can be read by the processor. If no model is provided, one will be created based on the descriptor of the primary
415 /// RNTuple.
416 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary
417 /// RNTuple (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the
418 /// models will be created based on the descriptors of their corresponding RNTuples. This also applies to individual
419 /// auxiliary RNTuples for which the provided model is a `nullptr`.
420 ///
421 /// \return A pointer to the newly created RNTupleProcessor.
422 static std::unique_ptr<RNTupleProcessor>
423 CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
424 const std::vector<std::string> &joinFields, std::string_view processorName,
425 std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
426 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
427};
428
429// clang-format off
430/**
431\class ROOT::Experimental::RNTupleSingleProcessor
432\ingroup NTuple
433\brief Processor specialization for processing a single RNTuple.
434*/
435// clang-format on
437 friend class RNTupleProcessor;
438
439private:
441
442 /////////////////////////////////////////////////////////////////////////////
443 /// \brief Connect the page source of the underlying RNTuple.
444 void Connect();
445
446 /////////////////////////////////////////////////////////////////////////////
447 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
448 /// processor).
449 ///
450 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
452
453 /////////////////////////////////////////////////////////////////////////////
454 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
455 void SetEntryPointers(const ROOT::REntry &entry) final;
456
457 /////////////////////////////////////////////////////////////////////////////
458 /// \brief Get the total number of entries in this processor.
460 {
461 Connect();
462 return fNEntries;
463 }
464
465 /////////////////////////////////////////////////////////////////////////////
466 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
467 ///
468 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
469 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
470 /// the name of the underlying RNTuple.
471 /// \param[in] model The model that specifies which fields should be read by the processor.
473 std::unique_ptr<ROOT::RNTupleModel> model);
474};
475
476// clang-format off
477/**
478\class ROOT::Experimental::RNTupleChainProcessor
479\ingroup NTuple
480\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
481*/
482// clang-format on
484 friend class RNTupleProcessor;
485
486private:
487 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
488 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
489
490 /////////////////////////////////////////////////////////////////////////////
491 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
492 /// processor).
493 ///
494 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
496
497 /////////////////////////////////////////////////////////////////////////////
498 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
499 void SetEntryPointers(const ROOT::REntry &) final;
500
501 /////////////////////////////////////////////////////////////////////////////
502 /// \brief Get the total number of entries in this processor.
503 ///
504 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
506
507 /////////////////////////////////////////////////////////////////////////////
508 /// \brief Construct a new RNTupleChainProcessor.
509 ///
510 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
511 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
512 /// is the name of the first inner processor.
513 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
514 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
515 /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
516 ///
517 /// RNTuples are processed in the order in which they are specified.
518 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName,
519 std::unique_ptr<ROOT::RNTupleModel> model);
520};
521
522// clang-format off
523/**
524\class ROOT::Experimental::RNTupleJoinProcessor
525\ingroup NTuple
526\brief Processor specialization for horizontally combined (*joined*) RNTuples.
527*/
528// clang-format on
530 friend class RNTupleProcessor;
531
532private:
533 std::vector<std::unique_ptr<ROOT::Internal::RPageSource>> fAuxiliaryPageSources;
534 /// Tokens representing the join fields present in the main RNTuple
535 std::vector<ROOT::RFieldToken> fJoinFieldTokens;
536 std::vector<std::unique_ptr<Internal::RNTupleJoinTable>> fJoinTables;
537 bool fJoinTablesAreBuilt = false;
538
539 bool HasJoinTable() const { return fJoinTables.size() > 0; }
540
541 /////////////////////////////////////////////////////////////////////////////
542 /// \brief Load the entry identified by the provided entry number of the primary RNTuple.
543 ///
544 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
546
547 /////////////////////////////////////////////////////////////////////////////
548 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
549 void SetEntryPointers(const ROOT::REntry &) final;
550
551 /////////////////////////////////////////////////////////////////////////////
552 /// \brief Get the total number of entries in this processor.
554
555 /////////////////////////////////////////////////////////////////////////////
556 /// \brief Set fModel by combining the primary and auxiliary models.
557 ///
558 /// \param[in] primaryModel The model of the primary RNTuple.
559 /// \param[in] auxModels Models of the auxiliary RNTuples.
560 ///
561 /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
562 /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
563 /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
564 void SetModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel,
565 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels);
566
567 /////////////////////////////////////////////////////////////////////////////
568 /// \brief Connect all fields, once the primary and all auxiliary RNTuples have been added.
569 void ConnectFields();
570
571 /////////////////////////////////////////////////////////////////////////////
572 /// \brief Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model.
573 ///
574 /// \param[in] joinFields The names of the fields used in the join.
575 void SetJoinFieldTokens(const std::vector<std::string> &joinFields)
576 {
577 fJoinFieldTokens.reserve(joinFields.size());
578 for (const auto &fieldName : joinFields) {
579 fJoinFieldTokens.emplace_back(fEntry->GetToken(fieldName));
580 }
581 }
582
583 /////////////////////////////////////////////////////////////////////////////
584 /// \brief Construct a new RNTupleJoinProcessor.
585 ///
586 /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple.
587 /// \param[in] auxNTUples The source specifications (name and storage location) of the auxiliary RNTuples.
588 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
589 /// The join is made based on the combined join field values, and therefore each field has to be present in each
590 /// specified RNTuple. If an empty list is provided, it is assumed that the RNTuples are fully aligned.
591 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
592 /// is the name of the main RNTuple.
593 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the
594 /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple.
595 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple
596 /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will
597 /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary
598 /// RNTuples for which the provided model is a `nullptr`.
599 RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
600 const std::vector<std::string> &joinFields, std::string_view processorName,
601 std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
602 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
603
604public:
610 {
611 for (auto &[_, fieldContext] : fFieldContexts) {
612 fieldContext.ResetConcreteField();
613 }
614 }
615};
616
617} // namespace Experimental
618} // namespace ROOT
619
620#endif // ROOT_RNTupleProcessor
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define _(A, B)
Definition cfortran.h:108
Processor specialization for vertically combined (chained) RNTupleProcessors.
void SetEntryPointers(const ROOT::REntry &) final
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
std::vector< ROOT::NTupleSize_t > fInnerNEntries
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTuples.
RNTupleJoinProcessor(const RNTupleJoinProcessor &)=delete
std::vector< std::unique_ptr< ROOT::Internal::RPageSource > > fAuxiliaryPageSources
RNTupleJoinProcessor(RNTupleJoinProcessor &&)=delete
void SetJoinFieldTokens(const std::vector< std::string > &joinFields)
Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model...
RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &)=delete
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&)=delete
std::vector< ROOT::RFieldToken > fJoinFieldTokens
Tokens representing the join fields present in the main RNTuple.
std::vector< std::unique_ptr< Internal::RNTupleJoinTable > > fJoinTables
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
Manager for a field as part of the RNTupleProcessor.
RFieldContext(std::unique_ptr< ROOT::RFieldBase > protoField, ROOT::RFieldToken token, std::size_t ntupleIdx=0)
void ResetConcreteField()
Concrete pages need to be reset explicitly before the page source they belong to is destroyed.
std::unique_ptr< ROOT::RFieldBase > fConcreteField
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
const std::string & GetProcessorName() const
Get the name of the processor.
std::unique_ptr< ROOT::REntry > fEntry
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
std::unique_ptr< ROOT::RNTupleModel > fModel
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::unique_ptr< ROOT::RNTupleModel > model=nullptr)
Create an RNTupleProcessor for a single RNTuple.
ROOT::NTupleSize_t fNEntries
Total number of entries.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector< RNTupleOpenSpec > &auxNTuples, const std::vector< std::string > &joinFields, std::unique_ptr< ROOT::RNTupleModel > primaryModel=nullptr, std::vector< std::unique_ptr< ROOT::RNTupleModel > > auxModels={})
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
RNTupleProcessor(RNTupleProcessor &&)=delete
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
std::unordered_map< std::string, RFieldContext > fFieldContexts
Maps the (qualified) field name to its corresponding field context.
const ROOT::RNTupleModel & GetModel() const
Get the model used by the processor.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual void SetEntryPointers(const ROOT::REntry &entry)=0
Point the entry's field values of the processor to the pointers from the provided entry.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
RNTupleProcessor(std::string_view processorName, std::unique_ptr< ROOT::RNTupleModel > model)
Create a new base RNTupleProcessor.
void ConnectField(RFieldContext &fieldContext, ROOT::Internal::RPageSource &pageSource, ROOT::REntry &entry)
Create and connect a concrete field to the current page source, based on its proto field.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::unique_ptr< ROOT::RNTupleModel > model=nullptr)
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
const ROOT::REntry & GetEntry() const
Get a reference to the entry used by the processor.
std::vector< RNTupleOpenSpec > fNTuples
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void SetEntryPointers(const ROOT::REntry &entry) final
void Connect()
Connect the page source of the underlying RNTuple.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
Abstract interface to read data from an ntuple.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
A field translates read and write calls from/to underlying columns to/from tree values.
A field token identifies a (sub)field in an entry.
The RNTupleModel encapulates the schema of an RNTuple.
Describe directory structure in memory.
Definition TDirectory.h:45
const Int_t n
Definition legend1.C:16
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.