Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2024-03-26
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleProcessor
16#define ROOT_RNTupleProcessor
17
18#include <ROOT/REntry.hxx>
19#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleTypes.hxx>
25#include <ROOT/RPageStorage.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35namespace Internal {
36struct RNTupleProcessorEntryLoader;
37} // namespace Internal
38
39// clang-format off
40/**
41\class ROOT::Experimental::RNTupleOpenSpec
42\ingroup NTuple
43\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
44
45An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
46TDirectory (or any of its subclasses) that contains the RNTuple.
47
48Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
49*/
50// clang-format on
52 friend class RNTupleProcessor;
55
56private:
57 std::string fNTupleName;
58 std::variant<std::string, TDirectory *> fStorage;
59
60public:
61 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
62 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
63
64 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
65};
66
67// clang-format off
68/**
69\class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
70\ingroup NTuple
71\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
72and checks for missing values.
73*/
74// clang-format on
75template <typename T>
77 friend class RNTupleProcessor;
78
79private:
82
88
89public:
90 /////////////////////////////////////////////////////////////////////////////
91 /// \brief Check if the pointer currently holds a valid value.
92 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
93
94 /////////////////////////////////////////////////////////////////////////////
95 /// \brief Get a shared pointer to the field value managed by the processor's entry.
96 ///
97 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
98 std::shared_ptr<T> GetPtr() const
99 {
100 if (fProcessorEntry->IsValidField(fFieldIndex))
101 return fProcessorEntry->GetPtr<T>(fFieldIndex);
102
103 return nullptr;
104 }
105
106 /////////////////////////////////////////////////////////////////////////////
107 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
108 ///
109 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
110 T *GetRawPtr() const { return GetPtr().get(); }
111
112 /////////////////////////////////////////////////////////////////////////////
113 /// \brief Bind the value to `valuePtr`.
114 ///
115 /// \param[in] valuePtr Pointer to bind the value to.
116 ///
117 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
118 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
119 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
120 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
121 /// its data through this interface, to ensure that only valid data can be read.
122 void BindRawPtr(T *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
123
124 /////////////////////////////////////////////////////////////////////////////
125 /// \brief Get a reference to the field value managed by the processor's entry.
126 ///
127 /// Throws an exception if the field is invalid in the processor's current entry.
128 const T &operator*() const
129 {
130 if (auto ptr = GetPtr())
131 return *ptr;
132 else
133 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
134 "\" because it has no value for the current entry"));
135 }
136
137 /////////////////////////////////////////////////////////////////////////////
138 /// \brief Access the field value managed by the processor's entry.
139 ///
140 /// Throws an exception if the field is invalid in the processor's current entry.
141 const T *operator->() const
142 {
143 if (auto ptr = GetPtr())
144 return ptr.get();
145 else
146 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
147 "\" because it has no value for the current entry"));
148 }
149};
150
151// clang-format off
152/**
153\class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
154\ingroup NTuple
155\brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
156*/
157// clang-format on
158template <>
160 friend class RNTupleProcessor;
161
162private:
165
171
172public:
173 /////////////////////////////////////////////////////////////////////////////
174 /// \brief Check if the pointer currently holds a valid value.
175 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
176
177 /////////////////////////////////////////////////////////////////////////////
178 /// \brief Get the pointer to the field value managed by the processor's entry.
179 ///
180 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
181 std::shared_ptr<void> GetPtr() const
182 {
183 if (fProcessorEntry->IsValidField(fFieldIndex))
184 return fProcessorEntry->GetPtr<void>(fFieldIndex);
185
186 return nullptr;
187 }
188
189 /////////////////////////////////////////////////////////////////////////////
190 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
191 ///
192 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
193 void *GetRawPtr() const { return GetPtr().get(); }
194
195 /////////////////////////////////////////////////////////////////////////////
196 /// \brief Bind the value to `valuePtr`.
197 ///
198 /// \param[in] valuePtr Pointer to bind the value to.
199 ///
200 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
201 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
202 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
203 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
204 /// its data through this interface, to ensure that only valid data can be read.
205 void BindRawPtr(void *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
206};
207
208// clang-format off
209/**
210\class ROOT::Experimental::RNTupleProcessor
211\ingroup NTuple
212\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
213
214Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
215
216~~~{.cpp}
217#include <ROOT/RNTupleProcessor.hxx>
218using ROOT::Experimental::RNTupleProcessor;
219using ROOT::Experimental::RNTupleOpenSpec;
220
221std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
222auto processor = RNTupleProcessor::CreateChain(ntuples);
223
224auto pt = processor->RequestField<float>("pt");
225
226for (const auto idx : *processor) {
227 std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
228}
229~~~
230
231An RNTupleProcessor is created either:
2321. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
2332. By providing a previously created RNTupleProcessor.
234
235The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
236processor, i.e. taking into account previously processed RNTuples.
237
238Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
239it can occur that entries are only partially complete in a join-based processor, field values may be marked as
240"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
241that is returned by RequestField().
242*/
243// clang-format on
249
250protected:
251 std::string fProcessorName;
252 std::unique_ptr<ROOT::RNTupleModel> fProtoModel = nullptr;
253 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
254 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
255
256 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
257 /// interface.
259
260 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
261 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
262 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
263
264 /////////////////////////////////////////////////////////////////////////////
265 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
266 /// an existing one.
267 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
268
269 /////////////////////////////////////////////////////////////////////////////
270 /// \brief Check if the processor already has been initialized.
271 bool IsInitialized() const { return fProtoModel && fEntry; }
272
273 /////////////////////////////////////////////////////////////////////////////
274 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
275 ///
276 /// \param[in] fieldIdxs Indices of the fields to connect.
277 /// \param[in] provenance Provenance of the processor.
278 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
279 /// RNTuple source changed.
280 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
281 const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) = 0;
282
283 /////////////////////////////////////////////////////////////////////////////
284 /// \brief Load the entry identified by the provided entry number.
285 ///
286 /// \param[in] entryNumber Entry number to load
287 ///
288 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
290
291 /////////////////////////////////////////////////////////////////////////////
292 /// \brief Get the proto model used by the processor.
293 ///
294 /// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the
295 /// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid.
297 {
298 assert(fProtoModel);
299 return *fProtoModel;
300 }
301
302 /////////////////////////////////////////////////////////////////////////////
303 /// \brief Get the total number of entries in this processor
305
306 /////////////////////////////////////////////////////////////////////////////
307 /// \brief Check if a field exists on-disk and can be read by the processor.
308 ///
309 /// \param[in] fieldName Name of the field to check.
310 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
311
312 /////////////////////////////////////////////////////////////////////////////
313 /// \brief Add a field to the entry.
314 ///
315 ///
316 /// \param[in] fieldName Name of the field to add.
317 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
318 /// created.
319 /// \param[in] provenance Provenance of the processor.
320 ///
321 /// \return The index of the newly added field in the entry.
322 ///
323 /// In case the field was already present in the entry, the index of the existing field is returned.
325 AddFieldToEntry(std::string_view fieldName, void *valuePtr,
326 const Internal::RNTupleProcessorProvenance &provenance) = 0;
327
328 /////////////////////////////////////////////////////////////////////////////
329 /// \brief Add the entry mappings for this processor to the provided join table.
330 ///
331 /// \param[in] joinTable the join table to map the entries to.
332 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
333 /// with respect to the processor's position in the chain.
334 virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0;
335
336 /////////////////////////////////////////////////////////////////////////////
337 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
338 ///
339 /// \param[in,out] output Output stream to print to.
340 virtual void PrintStructureImpl(std::ostream &output) const = 0;
341
342 /////////////////////////////////////////////////////////////////////////////
343 /// \brief Create a new base RNTupleProcessor.
344 ///
345 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
346 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
347 /// RNTuple for RNTupleJoinProcessor.
348 RNTupleProcessor(std::string_view processorName) : fProcessorName(processorName) {}
349
350public:
355 virtual ~RNTupleProcessor() = default;
356
357 /////////////////////////////////////////////////////////////////////////////
358 /// \brief Get the total number of entries processed so far.
360
361 /////////////////////////////////////////////////////////////////////////////
362 /// \brief Get the entry number that is currently being processed.
364
365 /////////////////////////////////////////////////////////////////////////////
366 /// \brief Get the number of the inner processor currently being read.
367 ///
368 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
370
371 /////////////////////////////////////////////////////////////////////////////
372 /// \brief Get the name of the processor.
373 ///
374 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
375 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
376 /// primary processor for RNTupleJoinProcessor.
377 const std::string &GetProcessorName() const { return fProcessorName; }
378
379 /////////////////////////////////////////////////////////////////////////////
380 /// \brief Request access to a field for reading during processing.
381 ///
382 /// \tparam T Type of the requested field.
383 ///
384 /// \param[in] fieldName Name of the requested field.
385 ///
386 /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
387 ///
388 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
389 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
390 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
391 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
392 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
393 template <typename T>
394 RNTupleProcessorOptionalPtr<T> RequestField(std::string_view fieldName, void *valuePtr = nullptr)
395 {
397 // TODO handle alternative (compatible field types)
398 auto fieldIdx = AddFieldToEntry(fieldName, valuePtr, Internal::RNTupleProcessorProvenance()).Unwrap();
399 return RNTupleProcessorOptionalPtr<T>(fEntry.get(), fieldIdx);
400 }
401
402 /////////////////////////////////////////////////////////////////////////////
403 /// \brief Print a graphical representation of the processor composition.
404 ///
405 /// \param[in,out] output Stream to print to (default is stdout).
406 ///
407 /// ### Example:
408 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
409 /// RNTuples will be printed as follows:
410 /// ~~~
411 /// +-----------------------------+ +-----------------------------+
412 /// | ntuple | | ntuple_aux |
413 /// | ntuple.root | | ntuple_aux1.root |
414 /// +-----------------------------+ +-----------------------------+
415 /// +-----------------------------+
416 /// | ntuple_aux |
417 /// | ntuple_aux2.root |
418 /// +-----------------------------+
419 /// ~~~
420 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
421
422 // clang-format off
423 /**
424 \class ROOT::Experimental::RNTupleProcessor::RIterator
425 \ingroup NTuple
426 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
427 */
428 // clang-format on
429 class RIterator {
430 private:
433
434 public:
435 using iterator_category = std::input_iterator_tag;
438 using difference_type = std::ptrdiff_t;
441
443 : fProcessor(processor), fCurrentEntryNumber(entryNumber)
444 {
445 if (!fProcessor.fEntry) {
446 fCurrentEntryNumber = ROOT::kInvalidNTupleIndex;
447 }
448 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
449 // know there is nothing to load.
451 fProcessor.Connect(fProcessor.fEntry->GetFieldIndices(), Internal::RNTupleProcessorProvenance(),
452 /*updateFields=*/false);
454 }
455 }
456
458 {
460 return *this;
461 }
462
464 {
465 auto obj = *this;
466 ++(*this);
467 return obj;
468 }
469
471
472 friend bool operator!=(const iterator &lh, const iterator &rh)
473 {
475 }
476 friend bool operator==(const iterator &lh, const iterator &rh)
477 {
479 }
480 };
481
482 RIterator begin() { return RIterator(*this, 0); }
484
485 /////////////////////////////////////////////////////////////////////////////
486 /// \brief Create an RNTupleProcessor for a single RNTuple.
487 ///
488 /// \param[in] ntuple The name and storage location of the RNTuple to process.
489 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
490 ///
491 /// \return A pointer to the newly created RNTupleProcessor.
492 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
493
494 /////////////////////////////////////////////////////////////////////////////
495 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
496 ///
497 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
498 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
499 ///
500 /// \return A pointer to the newly created RNTupleProcessor.
501 static std::unique_ptr<RNTupleProcessor>
502 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
503
504 /////////////////////////////////////////////////////////////////////////////
505 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
506 ///
507 /// \param[in] innerProcessors A list with the processors to chain.
508 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
509 /// used.
510 ///
511 /// \return A pointer to the newly created RNTupleProcessor.
512 static std::unique_ptr<RNTupleProcessor>
513 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
514
515 /////////////////////////////////////////////////////////////////////////////
516 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
517 ///
518 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
519 /// order.
520 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
521 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
522 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
523 /// The join is made based on the combined join field values, and therefore each field has to be present in each
524 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
525 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
526 ///
527 /// \return A pointer to the newly created RNTupleProcessor.
528 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
529 const std::vector<std::string> &joinFields,
530 std::string_view processorName = "");
531
532 /////////////////////////////////////////////////////////////////////////////
533 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
534 ///
535 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
536 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
537 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
538 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
539 /// The join is made based on the combined join field values, and therefore each field has to be present in each
540 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
541 /// aligned.
542 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
543 ///
544 /// \return A pointer to the newly created RNTupleProcessor.
545 static std::unique_ptr<RNTupleProcessor>
546 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
547 const std::vector<std::string> &joinFields, std::string_view processorName = "");
548};
549
550// clang-format off
551/**
552\class ROOT::Experimental::RNTupleSingleProcessor
553\ingroup NTuple
554\brief Processor specialization for processing a single RNTuple.
555*/
556// clang-format on
558 friend class RNTupleProcessor;
559
560private:
562 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
563
564 /////////////////////////////////////////////////////////////////////////////
565 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
566 /// an existing one.
567 ///
568 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
569 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
570
571 /////////////////////////////////////////////////////////////////////////////
572 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
573 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
575 bool updateFields = false) final;
576
577 /////////////////////////////////////////////////////////////////////////////
578 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
579 /// processor).
580 ///
581 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
582 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
583
584 /////////////////////////////////////////////////////////////////////////////
585 /// \brief Get the total number of entries in this processor.
587 {
588 Initialize();
591 return fNEntries;
592 }
593
594 /////////////////////////////////////////////////////////////////////////////
595 /// \brief Check if a field exists on-disk and can be read by the processor.
596 ///
597 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
598 bool CanReadFieldFromDisk(std::string_view fieldName) final;
599
600 /////////////////////////////////////////////////////////////////////////////
601 /// \brief Add a field to the entry.
602 ///
603 /// \sa RNTupleProcessor::AddFieldToEntry()
605 std::string_view fieldName, void *valuePtr = nullptr,
607
608 /////////////////////////////////////////////////////////////////////////////
609 /// \brief Add the entry mappings for this processor to the provided join table.
610 ///
611 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
612 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
613
614 /////////////////////////////////////////////////////////////////////////////
615 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
616 ///
617 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
618 void PrintStructureImpl(std::ostream &output) const final;
619
620 /////////////////////////////////////////////////////////////////////////////
621 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
622 ///
623 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
624 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
625 /// the name of the underlying RNTuple.
626 RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName);
627
628public:
631 RNTupleSingleProcessor &operator=(const RNTupleSingleProcessor &) = delete;
634 {
635 // The proto model needs to be deleted before fPageSource.
636 fProtoModel.release();
637 };
638};
639
640// clang-format off
641/**
642\class ROOT::Experimental::RNTupleChainProcessor
643\ingroup NTuple
644\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
645*/
646// clang-format on
648 friend class RNTupleProcessor;
649
650private:
651 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
652 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
653
655
656 /////////////////////////////////////////////////////////////////////////////
657 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
658 /// an existing one.
659 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
660
661 /////////////////////////////////////////////////////////////////////////////
662 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
663 ///
664 /// \sa RNTupleProcessor::Connect()
665 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
667 bool updateFields = false) final;
668
669 /////////////////////////////////////////////////////////////////////////////
670 /// \brief Update the entry to reflect any missing fields in the current inner processor.
671 void ConnectInnerProcessor(std::size_t processorNumber);
672
673 /////////////////////////////////////////////////////////////////////////////
674 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
675 /// processor).
676 ///
677 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
678 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
679
680 /////////////////////////////////////////////////////////////////////////////
681 /// \brief Get the total number of entries in this processor.
682 ///
683 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
685
686 /////////////////////////////////////////////////////////////////////////////
687 /// \brief Check if a field exists on-disk and can be read by the processor.
688 ///
689 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
690 bool CanReadFieldFromDisk(std::string_view fieldName) final
691 {
692 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
693 }
694
695 /////////////////////////////////////////////////////////////////////////////
696 /// \brief Add a field to the entry.
697 ///
698 /// \sa RNTupleProcessor::AddFieldToEntry()
700 std::string_view fieldName, void *valuePtr = nullptr,
702
703 /////////////////////////////////////////////////////////////////////////////
704 /// \brief Add the entry mappings for this processor to the provided join table.
705 ///
706 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
707 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
708
709 /////////////////////////////////////////////////////////////////////////////
710 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
711 ///
712 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
713 void PrintStructureImpl(std::ostream &output) const final;
714
715 /////////////////////////////////////////////////////////////////////////////
716 /// \brief Construct a new RNTupleChainProcessor.
717 ///
718 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
719 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
720 /// is the name of the first inner processor.
721 ///
722 /// RNTuples are processed in the order in which they are specified.
723 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
724
725public:
728 RNTupleChainProcessor &operator=(const RNTupleChainProcessor &) = delete;
730 ~RNTupleChainProcessor() override = default;
731};
732
733// clang-format off
734/**
735\class ROOT::Experimental::RNTupleJoinProcessor
736\ingroup NTuple
737\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
738*/
739// clang-format on
741 friend class RNTupleProcessor;
742
743private:
744 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
745 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
746
747 std::vector<std::string> fJoinFieldNames;
748 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
749
750 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
751 bool fJoinTableIsBuilt = false;
752
753 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
754
755 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
756 /// an existing one.
757 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
758
759 /////////////////////////////////////////////////////////////////////////////
760 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
761 ///
762 /// \sa RNTupleProcessor::Connect()
763 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
765 bool updateFields = false) final;
766
767 /////////////////////////////////////////////////////////////////////////////
768 /// \brief Load the entry identified by the provided entry number of the primary processor.
769 ///
770 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
771 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
772
773 /////////////////////////////////////////////////////////////////////////////
774 /// \brief Get the total number of entries in this processor.
776
777 /////////////////////////////////////////////////////////////////////////////
778 /// \brief Set the processor's proto model by combining the primary and auxiliary models.
779 ///
780 /// \param[in] primaryModel The proto model of the primary processor.
781 /// \param[in] auxModel The proto model of the auxiliary processors.
782 ///
783 /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
784 /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
785 /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
786 void SetProtoModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, std::unique_ptr<ROOT::RNTupleModel> auxModel);
787
788 /////////////////////////////////////////////////////////////////////////////
789 /// \brief Set the validity for all fields in the auxiliary processor at once.
790 void SetAuxiliaryFieldValidity(bool validity);
791
792 /////////////////////////////////////////////////////////////////////////////
793 /// \brief Check if a field exists on-disk and can be read by the processor.
794 ///
795 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
796 bool CanReadFieldFromDisk(std::string_view fieldName) final
797 {
798 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
799 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
800 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
801 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
802 }
803
804 return true;
805 }
806
807 /////////////////////////////////////////////////////////////////////////////
808 /// \brief Add a field to the entry.
809 ///
810 /// \sa RNTupleProcessor::AddFieldToEntry()
812 std::string_view fieldName, void *valuePtr = nullptr,
814
815 /////////////////////////////////////////////////////////////////////////////
816 /// \brief Add the entry mappings for this processor to the provided join table.
817 ///
818 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
819 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
820
821 /////////////////////////////////////////////////////////////////////////////
822 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
823 ///
824 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
825 void PrintStructureImpl(std::ostream &output) const final;
826
827 /////////////////////////////////////////////////////////////////////////////
828 /// \brief Construct a new RNTupleJoinProcessor.
829 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
830 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
831 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
832 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
833 /// The join is made based on the combined join field values, and therefore each field has to be present in each
834 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
835 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
836 /// is the name of the primary processor.
837 RNTupleJoinProcessor(std::unique_ptr<RNTupleProcessor> primaryProcessor,
838 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
839 std::string_view processorName);
840
841public:
843 RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete;
846 ~RNTupleJoinProcessor() override = default;
847};
848
849} // namespace Experimental
850} // namespace ROOT
851
852#endif // ROOT_RNTupleProcessor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing...
Processor specialization for vertically combined (chained) RNTupleProcessors.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
RNTupleChainProcessor(std::vector< std::unique_ptr< RNTupleProcessor > > processors, std::string_view processorName)
Construct a new RNTupleChainProcessor.
void ConnectInnerProcessor(std::size_t processorNumber)
Update the entry to reflect any missing fields in the current inner processor.
Internal::RNTupleProcessorProvenance fProvenance
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
std::vector< ROOT::NTupleSize_t > fInnerNEntries
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTupleProcessors.
std::set< Internal::RNTupleProcessorEntry::FieldIndex_t > fJoinFieldIdxs
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fAuxiliaryFieldIdxs
RNTupleJoinProcessor(std::unique_ptr< RNTupleProcessor > primaryProcessor, std::unique_ptr< RNTupleProcessor > auxProcessor, const std::vector< std::string > &joinFields, std::string_view processorName)
Construct a new RNTupleJoinProcessor.
void SetProtoModel(std::unique_ptr< ROOT::RNTupleModel > primaryModel, std::unique_ptr< ROOT::RNTupleModel > auxModel)
Set the processor's proto model by combining the primary and auxiliary models.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided entry number of the primary processor.
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void SetAuxiliaryFieldValidity(bool validity)
Set the validity for all fields in the auxiliary processor at once.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
std::unique_ptr< RNTupleProcessor > fPrimaryProcessor
std::unique_ptr< Internal::RNTupleJoinTable > fJoinTable
std::unique_ptr< RNTupleProcessor > fAuxiliaryProcessor
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
void BindRawPtr(void *valuePtr)
Bind the value to valuePtr.
void * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
std::shared_ptr< void > GetPtr() const
Get the pointer to the field value managed by the processor's entry.
bool HasValue() const
Check if the pointer currently holds a valid value.
std::shared_ptr< T > GetPtr() const
Get a shared pointer to the field value managed by the processor's entry.
const T & operator*() const
Get a reference to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
const T * operator->() const
Access the field value managed by the processor's entry.
void BindRawPtr(T *valuePtr)
Bind the value to valuePtr.
bool HasValue() const
Check if the pointer currently holds a valid value.
T * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry * fProcessorEntry
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
Identifies how a processor is composed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combine...
virtual bool CanReadFieldFromDisk(std::string_view fieldName)=0
Check if a field exists on-disk and can be read by the processor.
const std::string & GetProcessorName() const
Get the name of the processor.
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
static std::unique_ptr< RNTupleProcessor > CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, const std::vector< std::string > &joinFields, std::string_view processorName="")
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
ROOT::NTupleSize_t fNEntries
Total number of entries.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::string_view processorName="")
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::shared_ptr< Internal::RNTupleProcessorEntry > fEntry
virtual void PrintStructureImpl(std::ostream &output) const =0
Processor-specific implementation for printing its structure, called by PrintStructure().
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance)=0
Add a field to the entry.
virtual void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance, bool updateFields)=0
Connect fields to the page source of the processor's underlying RNTuple(s).
const ROOT::RNTupleModel & GetProtoModel() const
Get the proto model used by the processor.
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fFieldIdxs
RNTupleProcessorOptionalPtr< T > RequestField(std::string_view fieldName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
virtual void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry)=0
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
bool IsInitialized() const
Check if the processor already has been initialized.
virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0)=0
Add the entry mappings for this processor to the provided join table.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
void PrintStructure(std::ostream &output=std::cout)
Print a graphical representation of the processor composition.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
RNTupleProcessor(std::string_view processorName)
Create a new base RNTupleProcessor.
std::unique_ptr< ROOT::RNTupleModel > fProtoModel
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::string_view processorName="")
Create an RNTupleProcessor for a single RNTuple.
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName)
Construct a new RNTupleProcessor for processing a single RNTuple.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
The RNTupleModel encapulates the schema of an RNTuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
Describe directory structure in memory.
Definition TDirectory.h:45
STL class.
STL class.
STL class.
STL class.
const Int_t n
Definition legend1.C:16
Namespace for ROOT features in testing.
Definition TROOT.h:100
if(pos!=-1) leafTypeName.Remove(pos)
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.