Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2024-03-26
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleProcessor
16#define ROOT_RNTupleProcessor
17
18#include <ROOT/REntry.hxx>
19#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleTypes.hxx>
25#include <ROOT/RPageStorage.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35namespace Internal {
36struct RNTupleProcessorEntryLoader;
37} // namespace Internal
38
39// clang-format off
40/**
41\class ROOT::Experimental::RNTupleOpenSpec
42\ingroup NTuple
43\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
44
45An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
46TDirectory (or any of its subclasses) that contains the RNTuple.
47
48Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
49*/
50// clang-format on
52 friend class RNTupleProcessor;
55
56private:
57 std::string fNTupleName;
58 std::variant<std::string, TDirectory *> fStorage;
59
60public:
61 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
62 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
63
64 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
65};
66
67// clang-format off
68/**
69\class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
70\ingroup NTuple
71\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
72and checks for missing values.
73*/
74// clang-format on
75template <typename T>
77 friend class RNTupleProcessor;
78
79private:
82
88
89public:
90 /////////////////////////////////////////////////////////////////////////////
91 /// \brief Check if the pointer currently holds a valid value.
93
94 /////////////////////////////////////////////////////////////////////////////
95 /// \brief Get a shared pointer to the field value managed by the processor's entry.
96 ///
97 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
98 std::shared_ptr<T> GetPtr() const
99 {
102 return value.template GetPtr<T>();
103 }
104
105 return nullptr;
106 }
107
108 /////////////////////////////////////////////////////////////////////////////
109 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
110 ///
111 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
112 T *GetRawPtr() const { return GetPtr().get(); }
113
114 /////////////////////////////////////////////////////////////////////////////
115 /// \brief Bind the value to `valuePtr`.
116 ///
117 /// \param[in] valuePtr Pointer to bind the value to.
118 ///
119 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
120 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
121 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
122 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
123 /// its data through this interface, to ensure that only valid data can be read.
125
126 /////////////////////////////////////////////////////////////////////////////
127 /// \brief Get a reference to the field value managed by the processor's entry.
128 ///
129 /// Throws an exception if the field is invalid in the processor's current entry.
130 const T &operator*() const
131 {
132 if (auto ptr = GetPtr())
133 return *ptr;
134 else
135 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
136 "\" because it has no value for the current entry"));
137 }
138
139 /////////////////////////////////////////////////////////////////////////////
140 /// \brief Access the field value managed by the processor's entry.
141 ///
142 /// Throws an exception if the field is invalid in the processor's current entry.
143 const T *operator->() const
144 {
145 if (auto ptr = GetPtr())
146 return ptr.get();
147 else
148 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
149 "\" because it has no value for the current entry"));
150 }
151};
152
153// clang-format off
154/**
155\class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
156\ingroup NTuple
157\brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
158*/
159// clang-format on
160template <>
162 friend class RNTupleProcessor;
163
164private:
167
173
174public:
175 /////////////////////////////////////////////////////////////////////////////
176 /// \brief Check if the pointer currently holds a valid value.
178
179 /////////////////////////////////////////////////////////////////////////////
180 /// \brief Get the pointer to the field value managed by the processor's entry.
181 ///
182 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
183 std::shared_ptr<void> GetPtr() const
184 {
187 return value.template GetPtr<void>();
188 }
189
190 return nullptr;
191 }
192
193 /////////////////////////////////////////////////////////////////////////////
194 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
195 ///
196 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
197 void *GetRawPtr() const { return GetPtr().get(); }
198
199 /////////////////////////////////////////////////////////////////////////////
200 /// \brief Bind the value to `valuePtr`.
201 ///
202 /// \param[in] valuePtr Pointer to bind the value to.
203 ///
204 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
205 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
206 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
207 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
208 /// its data through this interface, to ensure that only valid data can be read.
210};
211
212// clang-format off
213/**
214\class ROOT::Experimental::RNTupleProcessor
215\ingroup NTuple
216\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
217
218Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
219
220~~~{.cpp}
221#include <ROOT/RNTupleProcessor.hxx>
222using ROOT::Experimental::RNTupleProcessor;
223using ROOT::Experimental::RNTupleOpenSpec;
224
225std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
226auto processor = RNTupleProcessor::CreateChain(ntuples);
227
228auto pt = processor->RequestField<float>("pt");
229
230for (const auto idx : *processor) {
231 std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
232}
233~~~
234
235An RNTupleProcessor is created either:
2361. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
2372. By providing a previously created RNTupleProcessor.
238
239The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
240processor, i.e. taking into account previously processed RNTuples.
241
242Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
243it can occur that entries are only partially complete in a join-based processor, field values may be marked as
244"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
245that is returned by RequestField().
246*/
247// clang-format on
253
254protected:
255 std::string fProcessorName;
256 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
257 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
258
259 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
260 /// interface.
262
263 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
264 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
265 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
266
267 /////////////////////////////////////////////////////////////////////////////
268 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
269 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
270
271 /////////////////////////////////////////////////////////////////////////////
272 /// \brief Check if the processor already has been initialized.
273 bool IsInitialized() const { return fEntry != nullptr; }
274
275 /////////////////////////////////////////////////////////////////////////////
276 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
277 ///
278 /// \param[in] fieldIdxs Indices of the fields to connect.
279 /// \param[in] provenance Provenance of the processor.
280 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
281 /// RNTuple source changed.
282 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
284
285 /////////////////////////////////////////////////////////////////////////////
286 /// \brief Load the entry identified by the provided entry number.
287 ///
288 /// \param[in] entryNumber Entry number to load
289 ///
290 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
292
293 /////////////////////////////////////////////////////////////////////////////
294 /// \brief Get the total number of entries in this processor
296
297 /////////////////////////////////////////////////////////////////////////////
298 /// \brief Check if a field exists on-disk and can be read by the processor.
299 ///
300 /// \param[in] fieldName Name of the field to check.
301 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
302
303 /////////////////////////////////////////////////////////////////////////////
304 /// \brief Add a field to the entry.
305 ///
306 ///
307 /// \param[in] fieldName Name of the field to add.
308 /// \param[in] typeName Type of the field to add.
309 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
310 /// created.
311 /// \param[in] provenance Provenance of the processor.
312 ///
313 /// \return The index of the newly added field in the entry.
314 ///
315 /// In case the field was already present in the entry, the index of the existing field is returned.
317 AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr,
319
320 /////////////////////////////////////////////////////////////////////////////
321 /// \brief Add the entry mappings for this processor to the provided join table.
322 ///
323 /// \param[in] joinTable the join table to map the entries to.
324 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
325 /// with respect to the processor's position in the chain.
327
328 /////////////////////////////////////////////////////////////////////////////
329 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
330 ///
331 /// \param[in,out] output Output stream to print to.
332 virtual void PrintStructureImpl(std::ostream &output) const = 0;
333
334 /////////////////////////////////////////////////////////////////////////////
335 /// \brief Create a new base RNTupleProcessor.
336 ///
337 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
338 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
339 /// RNTuple for RNTupleJoinProcessor.
341
342public:
347 virtual ~RNTupleProcessor() = default;
348
349 /////////////////////////////////////////////////////////////////////////////
350 /// \brief Get the total number of entries processed so far.
352
353 /////////////////////////////////////////////////////////////////////////////
354 /// \brief Get the entry number that is currently being processed.
356
357 /////////////////////////////////////////////////////////////////////////////
358 /// \brief Get the number of the inner processor currently being read.
359 ///
360 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
362
363 /////////////////////////////////////////////////////////////////////////////
364 /// \brief Get the name of the processor.
365 ///
366 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
367 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
368 /// primary processor for RNTupleJoinProcessor.
369 const std::string &GetProcessorName() const { return fProcessorName; }
370
371 /////////////////////////////////////////////////////////////////////////////
372 /// \brief Request access to a field for reading during processing.
373 ///
374 /// \tparam T Type of the requested field.
375 ///
376 /// \param[in] fieldName Name of the requested field.
377 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
378 /// created.
379 ///
380 /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
381 ///
382 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
383 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
384 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
385 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
386 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
387 template <typename T>
389 {
391 std::string typeName{};
392 if constexpr (!std::is_void_v<T>) {
393 typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T));
394 }
397 }
398
399 /////////////////////////////////////////////////////////////////////////////
400 /// \brief Print a graphical representation of the processor composition.
401 ///
402 /// \param[in,out] output Stream to print to (default is stdout).
403 ///
404 /// ### Example:
405 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
406 /// RNTuples will be printed as follows:
407 /// ~~~
408 /// +-----------------------------+ +-----------------------------+
409 /// | ntuple | | ntuple_aux |
410 /// | ntuple.root | | ntuple_aux1.root |
411 /// +-----------------------------+ +-----------------------------+
412 /// +-----------------------------+
413 /// | ntuple_aux |
414 /// | ntuple_aux2.root |
415 /// +-----------------------------+
416 /// ~~~
417 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
418
419 // clang-format off
420 /**
421 \class ROOT::Experimental::RNTupleProcessor::RIterator
422 \ingroup NTuple
423 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
424 */
425 // clang-format on
426 class RIterator {
427 private:
430
431 public:
432 using iterator_category = std::input_iterator_tag;
435 using difference_type = std::ptrdiff_t;
438
441 {
442 if (!fProcessor.fEntry) {
444 }
445 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
446 // know there is nothing to load.
449 /*updateFields=*/false);
451 }
452 }
453
459
461 {
462 auto obj = *this;
463 ++(*this);
464 return obj;
465 }
466
468
469 friend bool operator!=(const iterator &lh, const iterator &rh)
470 {
471 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
472 }
473 friend bool operator==(const iterator &lh, const iterator &rh)
474 {
475 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
476 }
477 };
478
479 RIterator begin() { return RIterator(*this, 0); }
481
482 /////////////////////////////////////////////////////////////////////////////
483 /// \brief Create an RNTupleProcessor for a single RNTuple.
484 ///
485 /// \param[in] ntuple The name and storage location of the RNTuple to process.
486 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
487 ///
488 /// \return A pointer to the newly created RNTupleProcessor.
489 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
490
491 /////////////////////////////////////////////////////////////////////////////
492 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
493 ///
494 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
495 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
496 ///
497 /// \return A pointer to the newly created RNTupleProcessor.
498 static std::unique_ptr<RNTupleProcessor>
499 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
500
501 /////////////////////////////////////////////////////////////////////////////
502 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
503 ///
504 /// \param[in] innerProcessors A list with the processors to chain.
505 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
506 /// used.
507 ///
508 /// \return A pointer to the newly created RNTupleProcessor.
509 static std::unique_ptr<RNTupleProcessor>
510 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
511
512 /////////////////////////////////////////////////////////////////////////////
513 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
514 ///
515 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
516 /// order.
517 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
518 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
519 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
520 /// The join is made based on the combined join field values, and therefore each field has to be present in each
521 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
522 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
523 ///
524 /// \return A pointer to the newly created RNTupleProcessor.
525 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
526 const std::vector<std::string> &joinFields,
527 std::string_view processorName = "");
528
529 /////////////////////////////////////////////////////////////////////////////
530 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
531 ///
532 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
533 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
534 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
535 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
536 /// The join is made based on the combined join field values, and therefore each field has to be present in each
537 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
538 /// aligned.
539 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
540 ///
541 /// \return A pointer to the newly created RNTupleProcessor.
542 static std::unique_ptr<RNTupleProcessor>
543 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
544 const std::vector<std::string> &joinFields, std::string_view processorName = "");
545};
546
547// clang-format off
548/**
549\class ROOT::Experimental::RNTupleSingleProcessor
550\ingroup NTuple
551\brief Processor specialization for processing a single RNTuple.
552*/
553// clang-format on
555 friend class RNTupleProcessor;
556
557private:
559 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
560
561 /////////////////////////////////////////////////////////////////////////////
562 /// \brief Create a new field and connect it to the processor's page source.
563 ///
564 /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable.
565 /// \param[in] typeName Type of the field to add.
566 ///
567 /// \return The newly created field.
568 /// \throws ROOT::RException In case the requested field cannot be found on disk.
569 std::unique_ptr<ROOT::RFieldBase>
570 CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName);
571
572 /////////////////////////////////////////////////////////////////////////////
573 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
574 ///
575 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
576 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
577
578 /////////////////////////////////////////////////////////////////////////////
579 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
580 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
582 bool updateFields = false) final;
583
584 /////////////////////////////////////////////////////////////////////////////
585 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
586 /// processor).
587 ///
588 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
590
591 /////////////////////////////////////////////////////////////////////////////
592 /// \brief Get the total number of entries in this processor.
600
601 /////////////////////////////////////////////////////////////////////////////
602 /// \brief Check if a field exists on-disk and can be read by the processor.
603 ///
604 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
605 bool CanReadFieldFromDisk(std::string_view fieldName) final;
606
607 /////////////////////////////////////////////////////////////////////////////
608 /// \brief Add a field to the entry.
609 ///
610 /// \sa RNTupleProcessor::AddFieldToEntry()
612 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
614
615 /////////////////////////////////////////////////////////////////////////////
616 /// \brief Add the entry mappings for this processor to the provided join table.
617 ///
618 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
619 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
620
621 /////////////////////////////////////////////////////////////////////////////
622 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
623 ///
624 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
625 void PrintStructureImpl(std::ostream &output) const final;
626
627 /////////////////////////////////////////////////////////////////////////////
628 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
629 ///
630 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
631 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
632 /// the name of the underlying RNTuple.
634
635public:
641 {
642 // The entry's fields need to be deleted before fPageSource.
643 if (fEntry)
644 fEntry->Clear();
645 };
646};
647
648// clang-format off
649/**
650\class ROOT::Experimental::RNTupleChainProcessor
651\ingroup NTuple
652\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
653*/
654// clang-format on
656 friend class RNTupleProcessor;
657
658private:
659 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
660 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
661
663
664 /////////////////////////////////////////////////////////////////////////////
665 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
666 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
667
668 /////////////////////////////////////////////////////////////////////////////
669 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
670 ///
671 /// \sa RNTupleProcessor::Connect()
672 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
674 bool updateFields = false) final;
675
676 /////////////////////////////////////////////////////////////////////////////
677 /// \brief Update the entry to reflect any missing fields in the current inner processor.
678 void ConnectInnerProcessor(std::size_t processorNumber);
679
680 /////////////////////////////////////////////////////////////////////////////
681 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
682 /// processor).
683 ///
684 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
686
687 /////////////////////////////////////////////////////////////////////////////
688 /// \brief Get the total number of entries in this processor.
689 ///
690 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
692
693 /////////////////////////////////////////////////////////////////////////////
694 /// \brief Check if a field exists on-disk and can be read by the processor.
695 ///
696 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
697 bool CanReadFieldFromDisk(std::string_view fieldName) final
698 {
699 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
700 }
701
702 /////////////////////////////////////////////////////////////////////////////
703 /// \brief Add a field to the entry.
704 ///
705 /// \sa RNTupleProcessor::AddFieldToEntry()
707 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
709
710 /////////////////////////////////////////////////////////////////////////////
711 /// \brief Add the entry mappings for this processor to the provided join table.
712 ///
713 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
714 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
715
716 /////////////////////////////////////////////////////////////////////////////
717 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
718 ///
719 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
720 void PrintStructureImpl(std::ostream &output) const final;
721
722 /////////////////////////////////////////////////////////////////////////////
723 /// \brief Construct a new RNTupleChainProcessor.
724 ///
725 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
726 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
727 /// is the name of the first inner processor.
728 ///
729 /// RNTuples are processed in the order in which they are specified.
730 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
731
732public:
738};
739
740// clang-format off
741/**
742\class ROOT::Experimental::RNTupleJoinProcessor
743\ingroup NTuple
744\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
745*/
746// clang-format on
748 friend class RNTupleProcessor;
749
750private:
751 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
752 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
753
754 std::vector<std::string> fJoinFieldNames;
755 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
756
757 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
758 bool fJoinTableIsBuilt = false;
759
760 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
761
762 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
763 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
764
765 /////////////////////////////////////////////////////////////////////////////
766 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
767 ///
768 /// \sa RNTupleProcessor::Connect()
769 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
771 bool updateFields = false) final;
772
773 /////////////////////////////////////////////////////////////////////////////
774 /// \brief Load the entry identified by the provided entry number of the primary processor.
775 ///
776 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
778
779 /////////////////////////////////////////////////////////////////////////////
780 /// \brief Get the total number of entries in this processor.
782
783 /////////////////////////////////////////////////////////////////////////////
784 /// \brief Set the validity for all fields in the auxiliary processor at once.
785 void SetAuxiliaryFieldValidity(bool validity);
786
787 /////////////////////////////////////////////////////////////////////////////
788 /// \brief Check if a field exists on-disk and can be read by the processor.
789 ///
790 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
791 bool CanReadFieldFromDisk(std::string_view fieldName) final
792 {
793 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
794 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
795 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
796 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
797 }
798
799 return true;
800 }
801
802 /////////////////////////////////////////////////////////////////////////////
803 /// \brief Add a field to the entry.
804 ///
805 /// \sa RNTupleProcessor::AddFieldToEntry()
807 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
809
810 /////////////////////////////////////////////////////////////////////////////
811 /// \brief Add the entry mappings for this processor to the provided join table.
812 ///
813 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
814 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
815
816 /////////////////////////////////////////////////////////////////////////////
817 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
818 ///
819 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
820 void PrintStructureImpl(std::ostream &output) const final;
821
822 /////////////////////////////////////////////////////////////////////////////
823 /// \brief Construct a new RNTupleJoinProcessor.
824 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
825 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
826 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
827 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
828 /// The join is made based on the combined join field values, and therefore each field has to be present in each
829 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
830 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
831 /// is the name of the primary processor.
833 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
834 std::string_view processorName);
835
836public:
842};
843
844} // namespace Experimental
845} // namespace ROOT
846
847#endif // ROOT_RNTupleProcessor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing...
void BindRawPtr(FieldIndex_t fieldIdx, void *valuePtr)
Bind a new value pointer to a field in the entry.
const ROOT::RFieldBase::RValue & GetValue(FieldIndex_t fieldIdx) const
bool IsValidField(FieldIndex_t fieldIdx) const
Check whether a field is valid for reading.
const std::string & FindFieldName(FieldIndex_t fieldIdx) const
Find the name of a field from its field index.
Processor specialization for vertically combined (chained) RNTupleProcessors.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void ConnectInnerProcessor(std::size_t processorNumber)
Update the entry to reflect any missing fields in the current inner processor.
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
Internal::RNTupleProcessorProvenance fProvenance
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
std::vector< ROOT::NTupleSize_t > fInnerNEntries
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTupleProcessors.
std::set< Internal::RNTupleProcessorEntry::FieldIndex_t > fJoinFieldIdxs
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fAuxiliaryFieldIdxs
std::unique_ptr< RNTupleProcessor > fPrimaryProcessor
std::unique_ptr< Internal::RNTupleJoinTable > fJoinTable
std::unique_ptr< RNTupleProcessor > fAuxiliaryProcessor
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
void BindRawPtr(void *valuePtr)
Bind the value to valuePtr.
void * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
std::shared_ptr< void > GetPtr() const
Get the pointer to the field value managed by the processor's entry.
bool HasValue() const
Check if the pointer currently holds a valid value.
std::shared_ptr< T > GetPtr() const
Get a shared pointer to the field value managed by the processor's entry.
const T & operator*() const
Get a reference to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
const T * operator->() const
Access the field value managed by the processor's entry.
void BindRawPtr(T *valuePtr)
Bind the value to valuePtr.
bool HasValue() const
Check if the pointer currently holds a valid value.
T * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry * fProcessorEntry
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
Identifies how a processor is composed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combine...
virtual bool CanReadFieldFromDisk(std::string_view fieldName)=0
Check if a field exists on-disk and can be read by the processor.
const std::string & GetProcessorName() const
Get the name of the processor.
RNTupleProcessorOptionalPtr< T > RequestField(const std::string &fieldName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
static std::unique_ptr< RNTupleProcessor > CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, const std::vector< std::string > &joinFields, std::string_view processorName="")
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
ROOT::NTupleSize_t fNEntries
Total number of entries.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::string_view processorName="")
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::shared_ptr< Internal::RNTupleProcessorEntry > fEntry
virtual void PrintStructureImpl(std::ostream &output) const =0
Processor-specific implementation for printing its structure, called by PrintStructure().
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance, bool updateFields)=0
Connect fields to the page source of the processor's underlying RNTuple(s).
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fFieldIdxs
virtual void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry)=0
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
bool IsInitialized() const
Check if the processor already has been initialized.
virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0)=0
Add the entry mappings for this processor to the provided join table.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
virtual Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance)=0
Add a field to the entry.
void PrintStructure(std::ostream &output=std::cout)
Print a graphical representation of the processor composition.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
RNTupleProcessor(std::string_view processorName)
Create a new base RNTupleProcessor.
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::string_view processorName="")
Create an RNTupleProcessor for a single RNTuple.
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
std::unique_ptr< ROOT::RFieldBase > CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName)
Create a new field and connect it to the processor's page source.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Describe directory structure in memory.
Definition TDirectory.h:45
const Int_t n
Definition legend1.C:16
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.