Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RArrowDS.hxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
9#ifndef ROOT_RARROWTDS
10#define ROOT_RARROWTDS
11
12#include "ROOT/RDataFrame.hxx"
13#include "ROOT/RDataSource.hxx"
14
15#include <memory>
16
17namespace arrow {
18class Table;
19}
20
21namespace ROOT {
22namespace Internal {
23namespace RDF {
24class TValueGetter;
25} // namespace RDF
26} // namespace Internal
27
28namespace RDF {
29
30class RArrowDS final : public RDataSource {
31private:
32 std::shared_ptr<arrow::Table> fTable;
33 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges;
34 std::vector<std::string> fColumnNames;
35 size_t fNSlots = 0U;
36
37 std::vector<std::pair<size_t, size_t>> fGetterIndex; // (columnId, visitorId)
38 std::vector<std::unique_ptr<ROOT::Internal::RDF::TValueGetter>> fValueGetters; // Visitors to be used to track and get entries. One per column.
39 std::vector<void *> GetColumnReadersImpl(std::string_view name, const std::type_info &type) final;
40
41public:
42 RArrowDS(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columns);
43 // Rule of five
44 RArrowDS(const RArrowDS &) = delete;
45 RArrowDS &operator=(const RArrowDS &) = delete;
46 RArrowDS(RArrowDS &&) = delete;
48 ~RArrowDS() final;
49
50 const std::vector<std::string> &GetColumnNames() const final;
51 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
52 std::string GetTypeName(std::string_view colName) const final;
53 bool HasColumn(std::string_view colName) const final;
54 bool SetEntry(unsigned int slot, ULong64_t entry) final;
55 void InitSlot(unsigned int slot, ULong64_t firstEntry) final;
56 void SetNSlots(unsigned int nSlots) final;
57 void Initialize() final;
58 std::string GetLabel() final;
59};
60
61RDataFrame FromArrow(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columnNames);
62
63} // namespace RDF
64
65} // namespace ROOT
66
67#endif
unsigned long long ULong64_t
Definition RtypesCore.h:70
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
Helper class which keeps track for each slot where to get the entry.
Definition RArrowDS.cxx:204
RDataFrame data source class to interface with Apache Arrow.
Definition RArrowDS.hxx:30
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition RArrowDS.cxx:469
RArrowDS(RArrowDS &&)=delete
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
Definition RArrowDS.cxx:554
RArrowDS(const RArrowDS &)=delete
RArrowDS & operator=(const RArrowDS &)=delete
RArrowDS & operator=(RArrowDS &&)=delete
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition RArrowDS.cxx:493
~RArrowDS() final
Destructor.
Definition RArrowDS.cxx:460
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
Definition RArrowDS.cxx:511
std::string GetLabel() final
Return a string representation of the datasource type.
Definition RArrowDS.cxx:595
void Initialize() final
Convenience method called before starting an event-loop.
Definition RArrowDS.cxx:589
std::shared_ptr< arrow::Table > fTable
Definition RArrowDS.hxx:32
std::vector< std::pair< size_t, size_t > > fGetterIndex
Definition RArrowDS.hxx:37
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition RArrowDS.cxx:475
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) final
This needs to return a pointer to the pointer each value getter will point to.
Definition RArrowDS.cxx:570
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
Definition RArrowDS.hxx:38
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RArrowDS.cxx:502
std::vector< std::string > fColumnNames
Definition RArrowDS.hxx:34
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition RArrowDS.hxx:33
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition RArrowDS.cxx:464
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
RDataFrame FromArrow(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columnNames)
Factory method to create a Apache Arrow RDataFrame.
Definition RArrowDS.cxx:606
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...