Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2024-03-26
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleProcessor
16#define ROOT_RNTupleProcessor
17
18#include <ROOT/REntry.hxx>
19#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleTypes.hxx>
25#include <ROOT/RPageStorage.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35namespace Internal {
36struct RNTupleProcessorEntryLoader;
37} // namespace Internal
38
39// clang-format off
40/**
41\class ROOT::Experimental::RNTupleOpenSpec
42\ingroup NTuple
43\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
44
45An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
46TDirectory (or any of its subclasses) that contains the RNTuple.
47
48Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
49*/
50// clang-format on
52 friend class RNTupleProcessor;
55
56private:
57 std::string fNTupleName;
58 std::variant<std::string, TDirectory *> fStorage;
59
60public:
61 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
62 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
63
64 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
65};
66
67// clang-format off
68/**
69\class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
70\ingroup NTuple
71\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
72and checks for missing values.
73*/
74// clang-format on
75template <typename T>
77 friend class RNTupleProcessor;
78
79private:
82
88
89public:
90 /////////////////////////////////////////////////////////////////////////////
91 /// \brief Check if the pointer currently holds a valid value.
93
94 /////////////////////////////////////////////////////////////////////////////
95 /// \brief Get a shared pointer to the field value managed by the processor's entry.
96 ///
97 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
98 std::shared_ptr<T> GetPtr() const
99 {
102 return value.template GetPtr<T>();
103 }
104
105 return nullptr;
106 }
107
108 /////////////////////////////////////////////////////////////////////////////
109 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
110 ///
111 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
112 T *GetRawPtr() const { return GetPtr().get(); }
113
114 /////////////////////////////////////////////////////////////////////////////
115 /// \brief Bind the value to `valuePtr`.
116 ///
117 /// \param[in] valuePtr Pointer to bind the value to.
118 ///
119 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
120 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
121 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
122 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
123 /// its data through this interface, to ensure that only valid data can be read.
125
126 /////////////////////////////////////////////////////////////////////////////
127 /// \brief Get a reference to the field value managed by the processor's entry.
128 ///
129 /// Throws an exception if the field is invalid in the processor's current entry.
130 const T &operator*() const
131 {
132 if (auto ptr = GetPtr())
133 return *ptr;
134 else
135 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
136 "\" because it has no value for the current entry"));
137 }
138
139 /////////////////////////////////////////////////////////////////////////////
140 /// \brief Access the field value managed by the processor's entry.
141 ///
142 /// Throws an exception if the field is invalid in the processor's current entry.
143 const T *operator->() const
144 {
145 if (auto ptr = GetPtr())
146 return ptr.get();
147 else
148 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
149 "\" because it has no value for the current entry"));
150 }
151};
152
153// clang-format off
154/**
155\class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
156\ingroup NTuple
157\brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
158*/
159// clang-format on
160template <>
162 friend class RNTupleProcessor;
163
164private:
167
173
174public:
175 /////////////////////////////////////////////////////////////////////////////
176 /// \brief Check if the pointer currently holds a valid value.
178
179 /////////////////////////////////////////////////////////////////////////////
180 /// \brief Get the pointer to the field value managed by the processor's entry.
181 ///
182 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
183 std::shared_ptr<void> GetPtr() const
184 {
187 return value.template GetPtr<void>();
188 }
189
190 return nullptr;
191 }
192
193 /////////////////////////////////////////////////////////////////////////////
194 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
195 ///
196 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
197 void *GetRawPtr() const { return GetPtr().get(); }
198
199 /////////////////////////////////////////////////////////////////////////////
200 /// \brief Bind the value to `valuePtr`.
201 ///
202 /// \param[in] valuePtr Pointer to bind the value to.
203 ///
204 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
205 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
206 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
207 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
208 /// its data through this interface, to ensure that only valid data can be read.
210};
211
212// clang-format off
213/**
214\class ROOT::Experimental::RNTupleProcessor
215\ingroup NTuple
216\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
217
218Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
219
220~~~{.cpp}
221#include <ROOT/RNTupleProcessor.hxx>
222using ROOT::Experimental::RNTupleProcessor;
223using ROOT::Experimental::RNTupleOpenSpec;
224
225std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
226auto processor = RNTupleProcessor::CreateChain(ntuples);
227
228auto pt = processor->RequestField<float>("pt");
229
230for (const auto idx : *processor) {
231 std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
232}
233~~~
234
235An RNTupleProcessor is created either:
2361. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
2372. By providing a previously created RNTupleProcessor.
238
239The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
240processor, i.e. taking into account previously processed RNTuples.
241
242Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
243it can occur that entries are only partially complete in a join-based processor, field values may be marked as
244"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
245that is returned by RequestField().
246*/
247// clang-format on
253
254protected:
255 std::string fProcessorName;
256 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
257 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
258
259 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
260 /// interface.
262
263 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
264 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
265 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
266
267 /////////////////////////////////////////////////////////////////////////////
268 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
269 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
270
271 /////////////////////////////////////////////////////////////////////////////
272 /// \brief Check if the processor already has been initialized.
273 bool IsInitialized() const { return fEntry != nullptr; }
274
275 /////////////////////////////////////////////////////////////////////////////
276 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
277 ///
278 /// \param[in] fieldIdxs Indices of the fields to connect.
279 /// \param[in] provenance Provenance of the processor.
280 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
281 /// RNTuple source changed.
282 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
284
285 /////////////////////////////////////////////////////////////////////////////
286 /// \brief Load the entry identified by the provided entry number.
287 ///
288 /// \param[in] entryNumber Entry number to load
289 ///
290 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
292
293 /////////////////////////////////////////////////////////////////////////////
294 /// \brief Get the total number of entries in this processor
296
297 /////////////////////////////////////////////////////////////////////////////
298 /// \brief Check if a field exists on-disk and can be read by the processor.
299 ///
300 /// \param[in] fieldName Name of the field to check.
301 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
302
303 /////////////////////////////////////////////////////////////////////////////
304 /// \brief Add a field to the entry.
305 ///
306 ///
307 /// \param[in] fieldName Name of the field to add.
308 /// \param[in] typeName Type of the field to add.
309 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
310 /// created.
311 /// \param[in] provenance Provenance of the processor.
312 ///
313 /// \return The index of the newly added field in the entry.
314 ///
315 /// In case the field was already present in the entry, the index of the existing field is returned.
317 AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr,
319
320 /////////////////////////////////////////////////////////////////////////////
321 /// \brief Add the entry mappings for this processor to the provided join table.
322 ///
323 /// \param[in] joinTable the join table to map the entries to.
324 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
325 /// with respect to the processor's position in the chain.
327
328 /////////////////////////////////////////////////////////////////////////////
329 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
330 ///
331 /// \param[in,out] output Output stream to print to.
332 virtual void PrintStructureImpl(std::ostream &output) const = 0;
333
334 /////////////////////////////////////////////////////////////////////////////
335 /// \brief Create a new base RNTupleProcessor.
336 ///
337 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
338 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
339 /// RNTuple for RNTupleJoinProcessor.
341
342public:
347 virtual ~RNTupleProcessor() = default;
348
349 /////////////////////////////////////////////////////////////////////////////
350 /// \brief Get the total number of entries processed so far.
352
353 /////////////////////////////////////////////////////////////////////////////
354 /// \brief Get the entry number that is currently being processed.
356
357 /////////////////////////////////////////////////////////////////////////////
358 /// \brief Get the number of the inner processor currently being read.
359 ///
360 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
362
363 /////////////////////////////////////////////////////////////////////////////
364 /// \brief Get the name of the processor.
365 ///
366 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
367 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
368 /// primary processor for RNTupleJoinProcessor.
369 const std::string &GetProcessorName() const { return fProcessorName; }
370
371 /////////////////////////////////////////////////////////////////////////////
372 /// \brief Request access to a field for reading during processing.
373 ///
374 /// \tparam T Type of the requested field.
375 ///
376 /// \param[in] fieldName Name of the requested field.
377 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
378 /// created.
379 ///
380 /// \return An RNTupleProcessorOptionalPtr of type `T`, which provides access to the field's value.
381 ///
382 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
383 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
384 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
385 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
386 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
387 template <typename T>
389 {
391 std::string typeName{};
392 if constexpr (!std::is_void_v<T>) {
393 typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T));
394 }
397 }
398
399 /////////////////////////////////////////////////////////////////////////////
400 /// \brief Request access to a field for reading during processing.
401 ///
402 /// \param[in] fieldName Name of the requested field.
403 /// \param[in] typeName Type of the requested field.
404 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
405 /// created.
406 ///
407 /// \return An void-type RNTupleProcessorOptionalPtr, which provides access to the field's value.
408 ///
409 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
410 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
411 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
412 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
413 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
415 RequestField(const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr)
416 {
420 }
421
422 /////////////////////////////////////////////////////////////////////////////
423 /// \brief Print a graphical representation of the processor composition.
424 ///
425 /// \param[in,out] output Stream to print to (default is stdout).
426 ///
427 /// ### Example:
428 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
429 /// RNTuples will be printed as follows:
430 /// ~~~
431 /// +-----------------------------+ +-----------------------------+
432 /// | ntuple | | ntuple_aux |
433 /// | ntuple.root | | ntuple_aux1.root |
434 /// +-----------------------------+ +-----------------------------+
435 /// +-----------------------------+
436 /// | ntuple_aux |
437 /// | ntuple_aux2.root |
438 /// +-----------------------------+
439 /// ~~~
440 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
441
442 // clang-format off
443 /**
444 \class ROOT::Experimental::RNTupleProcessor::RIterator
445 \ingroup NTuple
446 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
447 */
448 // clang-format on
449 class RIterator {
450 private:
453
454 public:
455 using iterator_category = std::input_iterator_tag;
458 using difference_type = std::ptrdiff_t;
461
464 {
465 if (!fProcessor.fEntry) {
467 }
468 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
469 // know there is nothing to load.
472 /*updateFields=*/false);
474 }
475 }
476
482
484 {
485 auto obj = *this;
486 ++(*this);
487 return obj;
488 }
489
491
492 friend bool operator!=(const iterator &lh, const iterator &rh)
493 {
494 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
495 }
496 friend bool operator==(const iterator &lh, const iterator &rh)
497 {
498 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
499 }
500 };
501
502 RIterator begin() { return RIterator(*this, 0); }
504
505 /////////////////////////////////////////////////////////////////////////////
506 /// \brief Create an RNTupleProcessor for a single RNTuple.
507 ///
508 /// \param[in] ntuple The name and storage location of the RNTuple to process.
509 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
510 ///
511 /// \return A pointer to the newly created RNTupleProcessor.
512 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
513
514 /////////////////////////////////////////////////////////////////////////////
515 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
516 ///
517 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
518 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
519 ///
520 /// \return A pointer to the newly created RNTupleProcessor.
521 static std::unique_ptr<RNTupleProcessor>
522 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
523
524 /////////////////////////////////////////////////////////////////////////////
525 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
526 ///
527 /// \param[in] innerProcessors A list with the processors to chain.
528 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
529 /// used.
530 ///
531 /// \return A pointer to the newly created RNTupleProcessor.
532 static std::unique_ptr<RNTupleProcessor>
533 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
534
535 /////////////////////////////////////////////////////////////////////////////
536 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
537 ///
538 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
539 /// order.
540 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
541 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
542 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
543 /// The join is made based on the combined join field values, and therefore each field has to be present in each
544 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
545 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
546 ///
547 /// \return A pointer to the newly created RNTupleProcessor.
548 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
549 const std::vector<std::string> &joinFields,
550 std::string_view processorName = "");
551
552 /////////////////////////////////////////////////////////////////////////////
553 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
554 ///
555 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
556 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
557 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
558 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
559 /// The join is made based on the combined join field values, and therefore each field has to be present in each
560 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
561 /// aligned.
562 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
563 ///
564 /// \return A pointer to the newly created RNTupleProcessor.
565 static std::unique_ptr<RNTupleProcessor>
566 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
567 const std::vector<std::string> &joinFields, std::string_view processorName = "");
568};
569
570// clang-format off
571/**
572\class ROOT::Experimental::RNTupleSingleProcessor
573\ingroup NTuple
574\brief Processor specialization for processing a single RNTuple.
575*/
576// clang-format on
578 friend class RNTupleProcessor;
579
580private:
582 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
583
584 /////////////////////////////////////////////////////////////////////////////
585 /// \brief Create a new field and connect it to the processor's page source.
586 ///
587 /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable.
588 /// \param[in] typeName Type of the field to add.
589 ///
590 /// \return The newly created field.
591 /// \throws ROOT::RException In case the requested field cannot be found on disk.
592 std::unique_ptr<ROOT::RFieldBase>
593 CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName);
594
595 /////////////////////////////////////////////////////////////////////////////
596 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
597 ///
598 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
599 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
600
601 /////////////////////////////////////////////////////////////////////////////
602 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
603 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
605 bool updateFields = false) final;
606
607 /////////////////////////////////////////////////////////////////////////////
608 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
609 /// processor).
610 ///
611 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
613
614 /////////////////////////////////////////////////////////////////////////////
615 /// \brief Get the total number of entries in this processor.
622
623 /////////////////////////////////////////////////////////////////////////////
624 /// \brief Check if a field exists on-disk and can be read by the processor.
625 ///
626 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
627 bool CanReadFieldFromDisk(std::string_view fieldName) final;
628
629 /////////////////////////////////////////////////////////////////////////////
630 /// \brief Add a field to the entry.
631 ///
632 /// \sa RNTupleProcessor::AddFieldToEntry()
634 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
636
637 /////////////////////////////////////////////////////////////////////////////
638 /// \brief Add the entry mappings for this processor to the provided join table.
639 ///
640 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
641 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
642
643 /////////////////////////////////////////////////////////////////////////////
644 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
645 ///
646 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
647 void PrintStructureImpl(std::ostream &output) const final;
648
649 /////////////////////////////////////////////////////////////////////////////
650 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
651 ///
652 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
653 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
654 /// the name of the underlying RNTuple.
656
657public:
663 {
664 // The entry's fields need to be deleted before fPageSource.
665 if (fEntry)
666 fEntry->Clear();
667 };
668};
669
670// clang-format off
671/**
672\class ROOT::Experimental::RNTupleChainProcessor
673\ingroup NTuple
674\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
675*/
676// clang-format on
678 friend class RNTupleProcessor;
679
680private:
681 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
682 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
683
685
686 /////////////////////////////////////////////////////////////////////////////
687 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
688 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
689
690 /////////////////////////////////////////////////////////////////////////////
691 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
692 ///
693 /// \sa RNTupleProcessor::Connect()
694 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
696 bool updateFields = false) final;
697
698 /////////////////////////////////////////////////////////////////////////////
699 /// \brief Update the entry to reflect any missing fields in the current inner processor.
700 void ConnectInnerProcessor(std::size_t processorNumber);
701
702 /////////////////////////////////////////////////////////////////////////////
703 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
704 /// processor).
705 ///
706 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
708
709 /////////////////////////////////////////////////////////////////////////////
710 /// \brief Get the total number of entries in this processor.
711 ///
712 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
714
715 /////////////////////////////////////////////////////////////////////////////
716 /// \brief Check if a field exists on-disk and can be read by the processor.
717 ///
718 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
719 bool CanReadFieldFromDisk(std::string_view fieldName) final
720 {
721 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
722 }
723
724 /////////////////////////////////////////////////////////////////////////////
725 /// \brief Add a field to the entry.
726 ///
727 /// \sa RNTupleProcessor::AddFieldToEntry()
729 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
731
732 /////////////////////////////////////////////////////////////////////////////
733 /// \brief Add the entry mappings for this processor to the provided join table.
734 ///
735 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
736 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
737
738 /////////////////////////////////////////////////////////////////////////////
739 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
740 ///
741 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
742 void PrintStructureImpl(std::ostream &output) const final;
743
744 /////////////////////////////////////////////////////////////////////////////
745 /// \brief Construct a new RNTupleChainProcessor.
746 ///
747 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
748 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
749 /// is the name of the first inner processor.
750 ///
751 /// RNTuples are processed in the order in which they are specified.
752 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
753
754public:
760};
761
762// clang-format off
763/**
764\class ROOT::Experimental::RNTupleJoinProcessor
765\ingroup NTuple
766\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
767*/
768// clang-format on
770 friend class RNTupleProcessor;
771
772private:
773 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
774 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
775
776 std::vector<std::string> fJoinFieldNames;
777 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
778
779 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
780 bool fJoinTableIsBuilt = false;
781
782 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
783
784 /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
785 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
786
787 /////////////////////////////////////////////////////////////////////////////
788 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
789 ///
790 /// \sa RNTupleProcessor::Connect()
791 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
793 bool updateFields = false) final;
794
795 /////////////////////////////////////////////////////////////////////////////
796 /// \brief Load the entry identified by the provided entry number of the primary processor.
797 ///
798 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
800
801 /////////////////////////////////////////////////////////////////////////////
802 /// \brief Get the total number of entries in this processor.
804
805 /////////////////////////////////////////////////////////////////////////////
806 /// \brief Set the validity for all fields in the auxiliary processor at once.
807 void SetAuxiliaryFieldValidity(bool validity);
808
809 /////////////////////////////////////////////////////////////////////////////
810 /// \brief Check if a field exists on-disk and can be read by the processor.
811 ///
812 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
813 bool CanReadFieldFromDisk(std::string_view fieldName) final
814 {
815 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
816 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
817 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
818 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
819 }
820
821 return true;
822 }
823
824 /////////////////////////////////////////////////////////////////////////////
825 /// \brief Add a field to the entry.
826 ///
827 /// \sa RNTupleProcessor::AddFieldToEntry()
829 const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
831
832 /////////////////////////////////////////////////////////////////////////////
833 /// \brief Add the entry mappings for this processor to the provided join table.
834 ///
835 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
836 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
837
838 /////////////////////////////////////////////////////////////////////////////
839 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
840 ///
841 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
842 void PrintStructureImpl(std::ostream &output) const final;
843
844 /////////////////////////////////////////////////////////////////////////////
845 /// \brief Construct a new RNTupleJoinProcessor.
846 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
847 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
848 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
849 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
850 /// The join is made based on the combined join field values, and therefore each field has to be present in each
851 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
852 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
853 /// is the name of the primary processor.
855 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
856 std::string_view processorName);
857
858public:
864};
865
866} // namespace Experimental
867} // namespace ROOT
868
869#endif // ROOT_RNTupleProcessor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing...
void BindRawPtr(FieldIndex_t fieldIdx, void *valuePtr)
Bind a new value pointer to a field in the entry.
const ROOT::RFieldBase::RValue & GetValue(FieldIndex_t fieldIdx) const
bool IsValidField(FieldIndex_t fieldIdx) const
Check whether a field is valid for reading.
const std::string & FindFieldName(FieldIndex_t fieldIdx) const
Find the name of a field from its field index.
Processor specialization for vertically combined (chained) RNTupleProcessors.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void ConnectInnerProcessor(std::size_t processorNumber)
Update the entry to reflect any missing fields in the current inner processor.
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
Internal::RNTupleProcessorProvenance fProvenance
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
std::vector< ROOT::NTupleSize_t > fInnerNEntries
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTupleProcessors.
std::set< Internal::RNTupleProcessorEntry::FieldIndex_t > fJoinFieldIdxs
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fAuxiliaryFieldIdxs
std::unique_ptr< RNTupleProcessor > fPrimaryProcessor
std::unique_ptr< Internal::RNTupleJoinTable > fJoinTable
std::unique_ptr< RNTupleProcessor > fAuxiliaryProcessor
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
void BindRawPtr(void *valuePtr)
Bind the value to valuePtr.
void * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
std::shared_ptr< void > GetPtr() const
Get the pointer to the field value managed by the processor's entry.
bool HasValue() const
Check if the pointer currently holds a valid value.
std::shared_ptr< T > GetPtr() const
Get a shared pointer to the field value managed by the processor's entry.
const T & operator*() const
Get a reference to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
const T * operator->() const
Access the field value managed by the processor's entry.
void BindRawPtr(T *valuePtr)
Bind the value to valuePtr.
bool HasValue() const
Check if the pointer currently holds a valid value.
T * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry * fProcessorEntry
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
Identifies how a processor is composed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combine...
virtual bool CanReadFieldFromDisk(std::string_view fieldName)=0
Check if a field exists on-disk and can be read by the processor.
const std::string & GetProcessorName() const
Get the name of the processor.
RNTupleProcessorOptionalPtr< T > RequestField(const std::string &fieldName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
static std::unique_ptr< RNTupleProcessor > CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, const std::vector< std::string > &joinFields, std::string_view processorName="")
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
ROOT::NTupleSize_t fNEntries
Total number of entries.
RNTupleProcessorOptionalPtr< void > RequestField(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::string_view processorName="")
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::shared_ptr< Internal::RNTupleProcessorEntry > fEntry
virtual void PrintStructureImpl(std::ostream &output) const =0
Processor-specific implementation for printing its structure, called by PrintStructure().
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance, bool updateFields)=0
Connect fields to the page source of the processor's underlying RNTuple(s).
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fFieldIdxs
virtual void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry)=0
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
bool IsInitialized() const
Check if the processor already has been initialized.
virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0)=0
Add the entry mappings for this processor to the provided join table.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
virtual Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance)=0
Add a field to the entry.
void PrintStructure(std::ostream &output=std::cout)
Print a graphical representation of the processor composition.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
RNTupleProcessor(std::string_view processorName)
Create a new base RNTupleProcessor.
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::string_view processorName="")
Create an RNTupleProcessor for a single RNTuple.
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor by creating an (initially empty) fEntry, or setting an existing one.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
std::unique_ptr< ROOT::RFieldBase > CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName)
Create a new field and connect it to the processor's page source.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Describe directory structure in memory.
Definition TDirectory.h:45
const Int_t n
Definition legend1.C:16
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.