Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RNTupleProcessor
17#define ROOT_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
24#include <ROOT/RNTupleTypes.hxx>
26#include <ROOT/RPageStorage.hxx>
27
28#include <memory>
29#include <string>
30#include <string_view>
31#include <vector>
32
33namespace ROOT {
34namespace Experimental {
35
36namespace Internal {
37struct RNTupleProcessorEntryLoader;
38} // namespace Internal
39
40// clang-format off
41/**
42\class ROOT::Experimental::RNTupleOpenSpec
43\ingroup NTuple
44\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
45
46An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
47TDirectory (or any of its subclasses) that contains the RNTuple.
48
49Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
50*/
51// clang-format on
53 friend class RNTupleProcessor;
56
57private:
58 std::string fNTupleName;
59 std::variant<std::string, TDirectory *> fStorage;
60
61public:
62 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
63 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
64
65 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
66};
67
68// clang-format off
69/**
70\class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
71\ingroup NTuple
72\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
73and checks for missing values.
74*/
75// clang-format on
76template <typename T>
78 friend class RNTupleProcessor;
79
80private:
83
89
90public:
91 /////////////////////////////////////////////////////////////////////////////
92 /// \brief Check if the pointer currently holds a valid value.
94
95 /////////////////////////////////////////////////////////////////////////////
96 /// \brief Get a shared pointer to the field value managed by the processor's entry.
97 ///
98 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
99 std::shared_ptr<T> GetPtr() const
100 {
103
104 return nullptr;
105 }
106
107 /////////////////////////////////////////////////////////////////////////////
108 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
109 ///
110 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
111 T *GetRawPtr() const { return GetPtr().get(); }
112
113 /////////////////////////////////////////////////////////////////////////////
114 /// \brief Bind the value to `valuePtr`.
115 ///
116 /// \param[in] valuePtr Pointer to bind the value to.
117 ///
118 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
119 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
120 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
121 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
122 /// its data through this interface, to ensure that only valid data can be read.
124
125 /////////////////////////////////////////////////////////////////////////////
126 /// \brief Get a reference to the field value managed by the processor's entry.
127 ///
128 /// Throws an exception if the field is invalid in the processor's current entry.
129 const T &operator*() const
130 {
131 if (auto ptr = GetPtr())
132 return *ptr;
133 else
134 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
135 "\" because it has no value for the current entry"));
136 }
137
138 /////////////////////////////////////////////////////////////////////////////
139 /// \brief Access the field value managed by the processor's entry.
140 ///
141 /// Throws an exception if the field is invalid in the processor's current entry.
142 const T *operator->() const
143 {
144 if (auto ptr = GetPtr())
145 return ptr.get();
146 else
147 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
148 "\" because it has no value for the current entry"));
149 }
150};
151
152// clang-format off
153/**
154\class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
155\ingroup NTuple
156\brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
157*/
158// clang-format on
159template <>
161 friend class RNTupleProcessor;
162
163private:
166
172
173public:
174 /////////////////////////////////////////////////////////////////////////////
175 /// \brief Check if the pointer currently holds a valid value.
177
178 /////////////////////////////////////////////////////////////////////////////
179 /// \brief Get the pointer to the field value managed by the processor's entry.
180 ///
181 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
182 std::shared_ptr<void> GetPtr() const
183 {
185 return fProcessorEntry->GetPtr<void>(fFieldIndex);
186
187 return nullptr;
188 }
189
190 /////////////////////////////////////////////////////////////////////////////
191 /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
192 ///
193 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
194 void *GetRawPtr() const { return GetPtr().get(); }
195
196 /////////////////////////////////////////////////////////////////////////////
197 /// \brief Bind the value to `valuePtr`.
198 ///
199 /// \param[in] valuePtr Pointer to bind the value to.
200 ///
201 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
202 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
203 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
204 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
205 /// its data through this interface, to ensure that only valid data can be read.
207};
208
209// clang-format off
210/**
211\class ROOT::Experimental::RNTupleProcessor
212\ingroup NTuple
213\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
214
215Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
216
217~~~{.cpp}
218#include <ROOT/RNTupleProcessor.hxx>
219using ROOT::Experimental::RNTupleProcessor;
220using ROOT::Experimental::RNTupleOpenSpec;
221
222std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
223auto processor = RNTupleProcessor::CreateChain(ntuples);
224
225auto pt = processor->RequestField<float>("pt");
226
227for (const auto idx : *processor) {
228 std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
229}
230~~~
231
232An RNTupleProcessor is created either:
2331. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
2342. By providing a previously created RNTupleProcessor.
235
236The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
237processor, i.e. taking into account previously processed RNTuples.
238
239Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
240it can occur that entries are only partially complete in a join-based processor, field values may be marked as
241"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
242that is returned by RequestField().
243*/
244// clang-format on
250
251protected:
252 std::string fProcessorName;
253 std::unique_ptr<ROOT::RNTupleModel> fProtoModel = nullptr;
254 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
255 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
256
257 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
258 /// interface.
260
261 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far
262 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
263 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor
264
265 /////////////////////////////////////////////////////////////////////////////
266 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
267 /// an existing one.
268 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
269
270 /////////////////////////////////////////////////////////////////////////////
271 /// \brief Check if the processor already has been initialized.
272 bool IsInitialized() const { return fProtoModel && fEntry; }
273
274 /////////////////////////////////////////////////////////////////////////////
275 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
276 ///
277 /// \param[in] fieldIdxs Indices of the fields to connect.
278 /// \param[in] provenance Provenance of the processor.
279 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
280 /// RNTuple source changed.
281 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
283
284 /////////////////////////////////////////////////////////////////////////////
285 /// \brief Load the entry identified by the provided entry number.
286 ///
287 /// \param[in] entryNumber Entry number to load
288 ///
289 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
291
292 /////////////////////////////////////////////////////////////////////////////
293 /// \brief Get the proto model used by the processor.
294 ///
295 /// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the
296 /// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid.
298 {
300 return *fProtoModel;
301 }
302
303 /////////////////////////////////////////////////////////////////////////////
304 /// \brief Get the total number of entries in this processor
306
307 /////////////////////////////////////////////////////////////////////////////
308 /// \brief Check if a field exists on-disk and can be read by the processor.
309 ///
310 /// \param[in] fieldName Name of the field to check.
311 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
312
313 /////////////////////////////////////////////////////////////////////////////
314 /// \brief Add a field to the entry.
315 ///
316 ///
317 /// \param[in] fieldName Name of the field to add.
318 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
319 /// created.
320 /// \param[in] provenance Provenance of the processor.
321 ///
322 /// \return The index of the newly added field in the entry.
323 ///
324 /// In case the field was already present in the entry, the index of the existing field is returned.
326 AddFieldToEntry(std::string_view fieldName, void *valuePtr,
328
329 /////////////////////////////////////////////////////////////////////////////
330 /// \brief Add the entry mappings for this processor to the provided join table.
331 ///
332 /// \param[in] joinTable the join table to map the entries to.
333 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
334 /// with respect to the processor's position in the chain.
336
337 /////////////////////////////////////////////////////////////////////////////
338 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
339 ///
340 /// \param[in,out] output Output stream to print to.
341 virtual void PrintStructureImpl(std::ostream &output) const = 0;
342
343 /////////////////////////////////////////////////////////////////////////////
344 /// \brief Create a new base RNTupleProcessor.
345 ///
346 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
347 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
348 /// RNTuple for RNTupleJoinProcessor.
350
351public:
356 virtual ~RNTupleProcessor() = default;
357
358 /////////////////////////////////////////////////////////////////////////////
359 /// \brief Get the total number of entries processed so far.
361
362 /////////////////////////////////////////////////////////////////////////////
363 /// \brief Get the entry number that is currently being processed.
365
366 /////////////////////////////////////////////////////////////////////////////
367 /// \brief Get the number of the inner processor currently being read.
368 ///
369 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
371
372 /////////////////////////////////////////////////////////////////////////////
373 /// \brief Get the name of the processor.
374 ///
375 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
376 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
377 /// primary processor for RNTupleJoinProcessor.
378 const std::string &GetProcessorName() const { return fProcessorName; }
379
380 /////////////////////////////////////////////////////////////////////////////
381 /// \brief Request access to a field for reading during processing.
382 ///
383 /// \tparam T Type of the requested field.
384 ///
385 /// \param[in] fieldName Name of the requested field.
386 ///
387 /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
388 ///
389 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
390 /// example when a field is not present in one of the chained processors or when during a join operation, no matching
391 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
392 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
393 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
394 template <typename T>
396 {
398 // TODO handle alternative (compatible field types)
401 }
402
403 /////////////////////////////////////////////////////////////////////////////
404 /// \brief Print a graphical representation of the processor composition.
405 ///
406 /// \param[in,out] output Stream to print to (default is stdout).
407 ///
408 /// ### Example:
409 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
410 /// RNTuples will be printed as follows:
411 /// ~~~
412 /// +-----------------------------+ +-----------------------------+
413 /// | ntuple | | ntuple_aux |
414 /// | ntuple.root | | ntuple_aux1.root |
415 /// +-----------------------------+ +-----------------------------+
416 /// +-----------------------------+
417 /// | ntuple_aux |
418 /// | ntuple_aux2.root |
419 /// +-----------------------------+
420 /// ~~~
421 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
422
423 // clang-format off
424 /**
425 \class ROOT::Experimental::RNTupleProcessor::RIterator
426 \ingroup NTuple
427 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
428 */
429 // clang-format on
430 class RIterator {
431 private:
434
435 public:
436 using iterator_category = std::input_iterator_tag;
439 using difference_type = std::ptrdiff_t;
442
445 {
446 if (!fProcessor.fEntry) {
448 }
449 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
450 // know there is nothing to load.
453 /*updateFields=*/false);
455 }
456 }
457
463
465 {
466 auto obj = *this;
467 ++(*this);
468 return obj;
469 }
470
472
473 friend bool operator!=(const iterator &lh, const iterator &rh)
474 {
475 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
476 }
477 friend bool operator==(const iterator &lh, const iterator &rh)
478 {
479 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
480 }
481 };
482
483 RIterator begin() { return RIterator(*this, 0); }
485
486 /////////////////////////////////////////////////////////////////////////////
487 /// \brief Create an RNTupleProcessor for a single RNTuple.
488 ///
489 /// \param[in] ntuple The name and storage location of the RNTuple to process.
490 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
491 ///
492 /// \return A pointer to the newly created RNTupleProcessor.
493 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
494
495 /////////////////////////////////////////////////////////////////////////////
496 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
497 ///
498 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
499 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
500 ///
501 /// \return A pointer to the newly created RNTupleProcessor.
502 static std::unique_ptr<RNTupleProcessor>
503 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
504
505 /////////////////////////////////////////////////////////////////////////////
506 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
507 ///
508 /// \param[in] innerProcessors A list with the processors to chain.
509 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
510 /// used.
511 ///
512 /// \return A pointer to the newly created RNTupleProcessor.
513 static std::unique_ptr<RNTupleProcessor>
514 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
515
516 /////////////////////////////////////////////////////////////////////////////
517 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
518 ///
519 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
520 /// order.
521 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
522 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
523 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
524 /// The join is made based on the combined join field values, and therefore each field has to be present in each
525 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
526 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
527 ///
528 /// \return A pointer to the newly created RNTupleProcessor.
529 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
530 const std::vector<std::string> &joinFields,
531 std::string_view processorName = "");
532
533 /////////////////////////////////////////////////////////////////////////////
534 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
535 ///
536 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
537 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
538 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
539 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
540 /// The join is made based on the combined join field values, and therefore each field has to be present in each
541 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
542 /// aligned.
543 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
544 ///
545 /// \return A pointer to the newly created RNTupleProcessor.
546 static std::unique_ptr<RNTupleProcessor>
547 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
548 const std::vector<std::string> &joinFields, std::string_view processorName = "");
549};
550
551// clang-format off
552/**
553\class ROOT::Experimental::RNTupleSingleProcessor
554\ingroup NTuple
555\brief Processor specialization for processing a single RNTuple.
556*/
557// clang-format on
559 friend class RNTupleProcessor;
560
561private:
563 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
564
565 /////////////////////////////////////////////////////////////////////////////
566 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
567 /// an existing one.
568 ///
569 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
570 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
571
572 /////////////////////////////////////////////////////////////////////////////
573 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
574 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
576 bool updateFields = false) final;
577
578 /////////////////////////////////////////////////////////////////////////////
579 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
580 /// processor).
581 ///
582 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
584
585 /////////////////////////////////////////////////////////////////////////////
586 /// \brief Get the total number of entries in this processor.
594
595 /////////////////////////////////////////////////////////////////////////////
596 /// \brief Check if a field exists on-disk and can be read by the processor.
597 ///
598 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
599 bool CanReadFieldFromDisk(std::string_view fieldName) final;
600
601 /////////////////////////////////////////////////////////////////////////////
602 /// \brief Add a field to the entry.
603 ///
604 /// \sa RNTupleProcessor::AddFieldToEntry()
606 std::string_view fieldName, void *valuePtr = nullptr,
608
609 /////////////////////////////////////////////////////////////////////////////
610 /// \brief Add the entry mappings for this processor to the provided join table.
611 ///
612 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
613 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
614
615 /////////////////////////////////////////////////////////////////////////////
616 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
617 ///
618 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
619 void PrintStructureImpl(std::ostream &output) const final;
620
621 /////////////////////////////////////////////////////////////////////////////
622 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
623 ///
624 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
625 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
626 /// the name of the underlying RNTuple.
628
629public:
635 {
636 // The proto model needs to be deleted before fPageSource.
637 fProtoModel.release();
638 };
639};
640
641// clang-format off
642/**
643\class ROOT::Experimental::RNTupleChainProcessor
644\ingroup NTuple
645\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
646*/
647// clang-format on
649 friend class RNTupleProcessor;
650
651private:
652 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
653 std::vector<ROOT::NTupleSize_t> fInnerNEntries;
654
656
657 /////////////////////////////////////////////////////////////////////////////
658 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
659 /// an existing one.
660 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
661
662 /////////////////////////////////////////////////////////////////////////////
663 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
664 ///
665 /// \sa RNTupleProcessor::Connect()
666 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
668 bool updateFields = false) final;
669
670 /////////////////////////////////////////////////////////////////////////////
671 /// \brief Update the entry to reflect any missing fields in the current inner processor.
672 void ConnectInnerProcessor(std::size_t processorNumber);
673
674 /////////////////////////////////////////////////////////////////////////////
675 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
676 /// processor).
677 ///
678 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
680
681 /////////////////////////////////////////////////////////////////////////////
682 /// \brief Get the total number of entries in this processor.
683 ///
684 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
686
687 /////////////////////////////////////////////////////////////////////////////
688 /// \brief Check if a field exists on-disk and can be read by the processor.
689 ///
690 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
691 bool CanReadFieldFromDisk(std::string_view fieldName) final
692 {
693 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
694 }
695
696 /////////////////////////////////////////////////////////////////////////////
697 /// \brief Add a field to the entry.
698 ///
699 /// \sa RNTupleProcessor::AddFieldToEntry()
701 std::string_view fieldName, void *valuePtr = nullptr,
703
704 /////////////////////////////////////////////////////////////////////////////
705 /// \brief Add the entry mappings for this processor to the provided join table.
706 ///
707 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
708 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
709
710 /////////////////////////////////////////////////////////////////////////////
711 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
712 ///
713 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
714 void PrintStructureImpl(std::ostream &output) const final;
715
716 /////////////////////////////////////////////////////////////////////////////
717 /// \brief Construct a new RNTupleChainProcessor.
718 ///
719 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
720 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
721 /// is the name of the first inner processor.
722 ///
723 /// RNTuples are processed in the order in which they are specified.
724 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
725
726public:
732};
733
734// clang-format off
735/**
736\class ROOT::Experimental::RNTupleJoinProcessor
737\ingroup NTuple
738\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
739*/
740// clang-format on
742 friend class RNTupleProcessor;
743
744private:
745 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
746 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
747
748 std::vector<std::string> fJoinFieldNames;
749 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
750
751 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
752 bool fJoinTableIsBuilt = false;
753
754 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
755
756 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
757 /// an existing one.
758 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
759
760 /////////////////////////////////////////////////////////////////////////////
761 /// \brief Connect the provided fields indices in the entry to their on-disk fields.
762 ///
763 /// \sa RNTupleProcessor::Connect()
764 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
766 bool updateFields = false) final;
767
768 /////////////////////////////////////////////////////////////////////////////
769 /// \brief Load the entry identified by the provided entry number of the primary processor.
770 ///
771 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
773
774 /////////////////////////////////////////////////////////////////////////////
775 /// \brief Get the total number of entries in this processor.
777
778 /////////////////////////////////////////////////////////////////////////////
779 /// \brief Set the processor's proto model by combining the primary and auxiliary models.
780 ///
781 /// \param[in] primaryModel The proto model of the primary processor.
782 /// \param[in] auxModel The proto model of the auxiliary processors.
783 ///
784 /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
785 /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
786 /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
787 void SetProtoModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, std::unique_ptr<ROOT::RNTupleModel> auxModel);
788
789 /////////////////////////////////////////////////////////////////////////////
790 /// \brief Set the validity for all fields in the auxiliary processor at once.
791 void SetAuxiliaryFieldValidity(bool validity);
792
793 /////////////////////////////////////////////////////////////////////////////
794 /// \brief Check if a field exists on-disk and can be read by the processor.
795 ///
796 /// \sa RNTupleProcessor::CanReadFieldFromDisk()
797 bool CanReadFieldFromDisk(std::string_view fieldName) final
798 {
799 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
800 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
801 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
802 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
803 }
804
805 return true;
806 }
807
808 /////////////////////////////////////////////////////////////////////////////
809 /// \brief Add a field to the entry.
810 ///
811 /// \sa RNTupleProcessor::AddFieldToEntry()
813 std::string_view fieldName, void *valuePtr = nullptr,
815
816 /////////////////////////////////////////////////////////////////////////////
817 /// \brief Add the entry mappings for this processor to the provided join table.
818 ///
819 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
820 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
821
822 /////////////////////////////////////////////////////////////////////////////
823 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
824 ///
825 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
826 void PrintStructureImpl(std::ostream &output) const final;
827
828 /////////////////////////////////////////////////////////////////////////////
829 /// \brief Construct a new RNTupleJoinProcessor.
830 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
831 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
832 /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
833 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
834 /// The join is made based on the combined join field values, and therefore each field has to be present in each
835 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
836 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
837 /// is the name of the primary processor.
839 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
840 std::string_view processorName);
841
842public:
848};
849
850} // namespace Experimental
851} // namespace ROOT
852
853#endif // ROOT_RNTupleProcessor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing...
void BindRawPtr(FieldIndex_t fieldIdx, void *valuePtr)
Bind a new value pointer to a field in the entry.
bool IsValidField(FieldIndex_t fieldIdx) const
Check whether a field is valid for reading.
const std::string & FindFieldName(FieldIndex_t fieldIdx) const
Find the name of a field from its field index.
std::shared_ptr< T > GetPtr(FieldIndex_t fieldIdx) const
Get a pointer to the value for the field represented by the provided field index.
Processor specialization for vertically combined (chained) RNTupleProcessors.
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
void ConnectInnerProcessor(std::size_t processorNumber)
Update the entry to reflect any missing fields in the current inner processor.
Internal::RNTupleProcessorProvenance fProvenance
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
std::vector< ROOT::NTupleSize_t > fInnerNEntries
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
std::vector< std::unique_ptr< RNTupleProcessor > > fInnerProcessors
Processor specialization for horizontally combined (joined) RNTupleProcessors.
std::set< Internal::RNTupleProcessorEntry::FieldIndex_t > fJoinFieldIdxs
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fAuxiliaryFieldIdxs
std::unique_ptr< RNTupleProcessor > fPrimaryProcessor
std::unique_ptr< Internal::RNTupleJoinTable > fJoinTable
std::unique_ptr< RNTupleProcessor > fAuxiliaryProcessor
Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
RNTupleOpenSpec(std::string_view n, const std::string &s)
std::variant< std::string, TDirectory * > fStorage
RNTupleOpenSpec(std::string_view n, TDirectory *s)
std::unique_ptr< ROOT::Internal::RPageSource > CreatePageSource() const
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
void BindRawPtr(void *valuePtr)
Bind the value to valuePtr.
void * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
std::shared_ptr< void > GetPtr() const
Get the pointer to the field value managed by the processor's entry.
bool HasValue() const
Check if the pointer currently holds a valid value.
std::shared_ptr< T > GetPtr() const
Get a shared pointer to the field value managed by the processor's entry.
const T & operator*() const
Get a reference to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex
const T * operator->() const
Access the field value managed by the processor's entry.
void BindRawPtr(T *valuePtr)
Bind the value to valuePtr.
bool HasValue() const
Check if the pointer currently holds a valid value.
T * GetRawPtr() const
Get a non-owning pointer to the field value managed by the processor's entry.
Internal::RNTupleProcessorEntry * fProcessorEntry
RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
Identifies how a processor is composed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combine...
virtual bool CanReadFieldFromDisk(std::string_view fieldName)=0
Check if a field exists on-disk and can be read by the processor.
const std::string & GetProcessorName() const
Get the name of the processor.
virtual ROOT::NTupleSize_t GetNEntries()=0
Get the total number of entries in this processor.
static std::unique_ptr< RNTupleProcessor > CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, const std::vector< std::string > &joinFields, std::string_view processorName="")
Create an RNTupleProcessor for a join (i.e., a horizontal combination) of RNTuples.
ROOT::NTupleSize_t fNEntries
Total number of entries.
friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader
static std::unique_ptr< RNTupleProcessor > CreateChain(std::vector< RNTupleOpenSpec > ntuples, std::string_view processorName="")
Create an RNTupleProcessor for a chain (i.e., a vertical combination) of RNTuples.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::shared_ptr< Internal::RNTupleProcessorEntry > fEntry
virtual void PrintStructureImpl(std::ostream &output) const =0
Processor-specific implementation for printing its structure, called by PrintStructure().
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber)=0
Load the entry identified by the provided entry number.
ROOT::NTupleSize_t GetCurrentEntryNumber() const
Get the entry number that is currently being processed.
virtual ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance)=0
Add a field to the entry.
virtual void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance, bool updateFields)=0
Connect fields to the page source of the processor's underlying RNTuple(s).
const ROOT::RNTupleModel & GetProtoModel() const
Get the proto model used by the processor.
std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > fFieldIdxs
RNTupleProcessorOptionalPtr< T > RequestField(std::string_view fieldName, void *valuePtr=nullptr)
Request access to a field for reading during processing.
virtual void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry)=0
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
bool IsInitialized() const
Check if the processor already has been initialized.
virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0)=0
Add the entry mappings for this processor to the provided join table.
std::size_t GetCurrentProcessorNumber() const
Get the number of the inner processor currently being read.
void PrintStructure(std::ostream &output=std::cout)
Print a graphical representation of the processor composition.
ROOT::NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
RNTupleProcessor(std::string_view processorName)
Create a new base RNTupleProcessor.
std::unique_ptr< ROOT::RNTupleModel > fProtoModel
static std::unique_ptr< RNTupleProcessor > Create(RNTupleOpenSpec ntuple, std::string_view processorName="")
Create an RNTupleProcessor for a single RNTuple.
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specialization for processing a single RNTuple.
void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset=0) final
Add the entry mappings for this processor to the provided join table.
ROOT::RResult< Internal::RNTupleProcessorEntry::FieldIndex_t > AddFieldToEntry(std::string_view fieldName, void *valuePtr=nullptr, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance()) final
Add a field to the entry.
void Connect(const std::unordered_set< Internal::RNTupleProcessorEntry::FieldIndex_t > &fieldIdxs, const Internal::RNTupleProcessorProvenance &provenance=Internal::RNTupleProcessorProvenance(), bool updateFields=false) final
Connect the provided fields indices in the entry to their on-disk fields.
void Initialize(std::shared_ptr< Internal::RNTupleProcessorEntry > entry=nullptr) final
Initialize the processor, by setting fProtoModel and creating an (initially empty) fEntry,...
void PrintStructureImpl(std::ostream &output) const final
Processor-specific implementation for printing its structure, called by PrintStructure().
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
bool CanReadFieldFromDisk(std::string_view fieldName) final
Check if a field exists on-disk and can be read by the processor.
ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final
Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in th...
ROOT::NTupleSize_t GetNEntries() final
Get the total number of entries in this processor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
The RNTupleModel encapulates the schema of an RNTuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:198
Describe directory structure in memory.
Definition TDirectory.h:45
const Int_t n
Definition legend1.C:16
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
static void output()