20#include <unordered_map>
40 static const std::unordered_map<ColType_t, std::string>
fgColTypeMap;
48 std::unique_ptr<ROOT::Internal::RRawFile>
fCsvFile;
54 std::unordered_map<std::string, ColType_t>
fColTypes;
72 void InferType(
const std::string &,
unsigned int);
73 std::vector<std::string>
ParseColumns(
const std::string &);
74 size_t ParseValue(
const std::string &, std::vector<std::string> &,
size_t);
82 RCsvDS(std::string_view fileName,
bool readHeaders = true,
char delimiter = ',',
Long64_t linesChunkSize = -1LL,
83 std::unordered_map<std::
string,
char> &&colTypes = {});
88 std::
string GetTypeName(std::string_view colName) const final;
89 bool HasColumn(std::string_view colName) const final;
91 void SetNSlots(
unsigned int nSlots) final;
105RDataFrame FromCSV(std::string_view fileName,
bool readHeaders = true,
char delimiter = ',',
106 Long64_t linesChunkSize = -1LL, std::unordered_map<std::
string,
char> &&colTypes = {});
unsigned long long ULong64_t
RDataFrame data source class for reading CSV files.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void FillRecord(const std::string &, Record_t &)
void Finalize() final
Convenience method called after concluding an event-loop.
ColType_t GetType(std::string_view colName) const
std::vector< std::vector< double > > fDoubleEvtValues
void InferType(const std::string &, unsigned int)
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
static const std::unordered_map< ColType_t, std::string > fgColTypeMap
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
static const TRegexp fgTrueRegex
void GenerateHeaders(size_t)
std::vector< std::vector< void * > > fColAddresses
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
const Long64_t fLinesChunkSize
std::string AsString() final
std::vector< std::string > fHeaders
ULong64_t fEntryRangesRequested
ULong64_t fProcessedLines
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InferColTypes(std::vector< std::string > &)
std::unordered_map< std::string, ColType_t > fColTypes
std::vector< std::vector< Long64_t > > fLong64EvtValues
static const TRegexp fgDoubleRegex2
std::vector< Record_t > fRecords
std::set< std::string > fColContainingEmpty
static const TRegexp fgFalseRegex
static const TRegexp fgDoubleRegex3
void ValidateColTypes(std::vector< std::string > &) const
static const TRegexp fgIntRegex
std::vector< std::string > ParseColumns(const std::string &)
void FillHeaders(const std::string &)
std::unique_ptr< ROOT::Internal::RRawFile > fCsvFile
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::string GetLabel() final
Return a string representation of the datasource type.
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
static const TRegexp fgDoubleRegex1
std::vector< std::vector< std::string > > fStringEvtValues
std::vector< std::deque< bool > > fBoolEvtValues
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::list< ColType_t > fColTypesList
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Regular expression class.
RDataFrame FromCSV(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Factory method to create a CSV RDataFrame.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...