20#include <unordered_map>
66 static const std::unordered_map<ColType_t, std::string>
fgColTypeMap;
77 std::unique_ptr<ROOT::Internal::RRawFile>
fCsvFile;
81 std::unordered_map<std::string, ColType_t>
fColTypes;
103 void InferType(
const std::string &,
unsigned int);
104 std::vector<std::string>
ParseColumns(
const std::string &);
105 size_t ParseValue(
const std::string &, std::vector<std::string> &,
size_t);
114 RCsvDS(std::string_view fileName,
bool readHeaders = true,
char delimiter = ',',
Long64_t linesChunkSize = -1LL,
115 std::unordered_map<std::
string,
char> &&colTypes = {});
127 std::
string GetTypeName(std::string_view colName) const final;
128 bool HasColumn(std::string_view colName) const final;
130 void SetNSlots(
unsigned int nSlots) final;
150RDataFrame FromCSV(std::string_view fileName,
bool readHeaders = true,
char delimiter = ',',
151 Long64_t linesChunkSize = -1LL, std::unordered_map<std::
string,
char> &&colTypes = {});
unsigned long long ULong64_t
RDataFrame data source class for reading CSV files.
std::int64_t fDataLineNumber
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void FillRecord(const std::string &, Record_t &)
void Finalize() final
Convenience method called after concluding an event-loop.
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
ColType_t GetType(std::string_view colName) const
std::vector< std::vector< double > > fDoubleEvtValues
void InferType(const std::string &, unsigned int)
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
static const std::unordered_map< ColType_t, std::string > fgColTypeMap
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
static const TRegexp fgTrueRegex
void GenerateHeaders(size_t)
std::vector< std::vector< void * > > fColAddresses
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
std::string AsString() final
bool Readln(std::string &line)
std::vector< std::string > fHeaders
ULong64_t fEntryRangesRequested
std::int64_t fMaxLineNumber
RCsvDS & operator=(RCsvDS &&)=delete
ULong64_t fProcessedLines
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InferColTypes(std::vector< std::string > &)
std::unordered_map< std::string, ColType_t > fColTypes
std::vector< std::vector< Long64_t > > fLong64EvtValues
static const TRegexp fgDoubleRegex2
std::vector< Record_t > fRecords
std::set< std::string > fColContainingEmpty
~RCsvDS() final
Destructor.
static const TRegexp fgFalseRegex
static const TRegexp fgDoubleRegex3
void ValidateColTypes(std::vector< std::string > &) const
static const TRegexp fgIntRegex
RCsvDS(const RCsvDS &)=delete
std::vector< std::string > ParseColumns(const std::string &)
void FillHeaders(const std::string &)
std::unique_ptr< ROOT::Internal::RRawFile > fCsvFile
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::string GetLabel() final
Return a string representation of the datasource type.
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
static const TRegexp fgDoubleRegex1
RCsvDS & operator=(const RCsvDS &)=delete
std::vector< std::vector< std::string > > fStringEvtValues
std::vector< std::deque< bool > > fBoolEvtValues
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::list< ColType_t > fColTypesList
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Regular expression class.
RDataFrame FromCSV(std::string_view fileName, const RCsvDS::ROptions &options)
Factory method to create a CSV RDataFrame.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Options that control how the CSV file is parsed.
bool fHeaders
The first line describes the columns.
bool fRightTrim
Trailing whitespaces are removed.
std::int64_t fSkipFirstNLines
Ignore the first N lines of the file.
std::vector< std::string > fColumnNames
Impose column names.
std::int64_t fSkipLastNLines
Ignore the last N lines of the file.
std::unordered_map< std::string, char > fColumnTypes
Specify custom column types, accepts an unordered map with keys being column name,...
bool fSkipBlankLines
Ignore empty lines (after trimming, if trimming is enabled)
char fDelimiter
Column delimiter character.
char fComment
Character indicating that the remainder of the line should be ignored, if different from '\0'.
bool fLeftTrim
Leading whitespaces are removed.
std::int64_t fLinesChunkSize
Number of lines to read, -1 to read all.