29#include <unordered_map>
32namespace Experimental {
36class RNTupleDescriptor;
39class RNTupleColumnReader;
50 std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
fSource;
72 std::vector<std::unique_ptr<ROOT::Experimental::RFieldBase>>
fProtoFields;
110 std::vector<RFieldInfo> fieldInfos);
118 explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Internal::RPageSource> pageSource);
121 RNTupleDS(std::string_view ntupleName, std::string_view fileName);
123 RNTupleDS(std::string_view ntupleName,
const std::vector<std::string> &fileNames);
126 void SetNSlots(
unsigned int nSlots)
final;
128 bool HasColumn(std::string_view colName)
const final;
129 std::string
GetTypeName(std::string_view colName)
const final;
130 std::vector<std::pair<ULong64_t, ULong64_t>>
GetEntryRanges() final;
131 std::
string GetLabel() final {
return "RNTupleDS"; }
141 GetColumnReaders(
unsigned int , std::string_view , const std::type_info &) final;
150namespace Experimental {
151RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName);
152RDataFrame FromRNTuple(std::string_view ntupleName,
const std::vector<std::string> &fileNames);
unsigned long long ULong64_t
Every RDF column is represented by exactly one RNTuple field.
The RDataSource implementation for RNTuple.
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
void Initialize() final
Convenience method called before starting an event-loop.
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos)
Provides the RDF column "colName" given the field identified by fieldID.
std::size_t fNextFileIndex
Index into fFileNames to the next file to process.
std::vector< std::string > fColumnNames
std::unordered_map< ULong64_t, std::size_t > fFirstEntry2RangeIdx
Maps the first entries from the ranges of the last GetEntryRanges() call to their corresponding index...
void Finalize() final
Convenience method called after concluding an event-loop.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
std::unordered_map< ROOT::Experimental::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
std::vector< std::vector< Internal::RNTupleColumnReader * > > fActiveColumnReaders
List of column readers returned by GetColumnReaders() organized by slot.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::unique_ptr< Internal::RPageSource > fPrincipalSource
The first source is used to extract the schema and build the prototype fields.
std::vector< REntryRangeDS > fCurrentRanges
Basis for the ranges returned by the last GetEntryRanges() call.
std::unique_ptr< RNTupleDescriptor > fPrincipalDescriptor
A clone of the first pages source's descriptor.
std::vector< REntryRangeDS > fNextRanges
Basis for the ranges populated by the PrepareNextRanges() call.
std::vector< std::unique_ptr< ROOT::Experimental::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
std::vector< std::string > fFileNames
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
std::vector< std::string > fColumnTypes
std::string GetLabel() final
Return a string representation of the datasource type.
ULong64_t fSeenEntries
The number of entries so far returned by GetEntryRanges()
The on-storage meta-data of an ntuple.
Representation of an RNTuple data set in a ROOT file.
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.
std::unique_ptr< ROOT::Experimental::Internal::RPageSource > fSource
ULong64_t fLastEntry
End entry index in fSource, e.g. the number of entries in the range is fLastEntry - fFirstEntry.
ULong64_t fFirstEntry
First entry index in fSource.
Holds useful information about fields added to the RNTupleDS.
RFieldInfo(DescriptorId_t fieldId, std::size_t nRepetitions)
std::size_t fNRepetitions