24 std::shared_ptr<arrow::Table>
fTable;
30 std::vector<std::unique_ptr<ROOT::Internal::RDF::TValueGetter>>
fValueGetters;
34 RArrowDS(std::shared_ptr<arrow::Table> table, std::vector<std::string>
const &columns);
37 std::vector<std::pair<ULong64_t, ULong64_t>>
GetEntryRanges()
override;
42 void SetNSlots(
unsigned int nSlots)
override;
unsigned long long ULong64_t
RDataFrame data source class to interface with Apache Arrow.
bool HasColumn(std::string_view colName) const override
Checks if the dataset has a certain column.
RArrowDS(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Constructor to create an Arrow RDataSource for RDataFrame.
void Initialise() override
Convenience method called before starting an event-loop.
std::string GetLabel() override
Return a string representation of the datasource type.
void SetNSlots(unsigned int nSlots) override
Inform RDataSource of the number of processing slots (i.e.
const std::vector< std::string > & GetColumnNames() const override
Returns a reference to the collection of the dataset's column names.
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
Convenience method called at the start of the data processing associated to a slot.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() override
Return ranges of entries to distribute to tasks.
std::shared_ptr< arrow::Table > fTable
std::vector< std::pair< size_t, size_t > > fGetterIndex
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) override
This needs to return a pointer to the pointer each value getter will point to.
std::vector< std::string > fColumnNames
std::string GetTypeName(std::string_view colName) const override
Type of a column as a string, e.g.
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
bool SetEntry(unsigned int slot, ULong64_t entry) override
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
basic_string_view< char > string_view
RDataFrame MakeArrowDataFrame(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Factory method to create a Apache Arrow RDataFrame.