20#include <unordered_map>
76 static const std::unordered_map<ColType_t, std::string>
fgColTypeMap;
86 std::unique_ptr<ROOT::Internal::RRawFile>
fCsvFile;
90 std::unordered_map<std::string, ColType_t>
fColTypes;
112 void InferType(
const std::string &,
unsigned int);
113 std::vector<std::string>
ParseColumns(
const std::string &);
114 size_t ParseValue(
const std::string &, std::vector<std::string> &,
size_t);
124 std::unordered_map<std::
string,
char> &&colTypes = {});
139 void SetNSlots(
unsigned int nSlots) final;
163 Long64_t linesChunkSize = -1LL, std::unordered_map<std::
string,
char> &&colTypes = {});
long long Long64_t
Portable signed long integer 8 bytes.
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Pure virtual base class for all column reader types.
RCsvDSColumnReader(void *valuePtr)
void * GetImpl(Long64_t) final
The RRawFile provides read-only access to local and remote files.
RDataFrame data source class for reading CSV files.
std::int64_t fDataLineNumber
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void FillRecord(const std::string &, Record_t &)
void Finalize() final
Convenience method called after concluding an event-loop.
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
ColType_t GetType(std::string_view colName) const
std::vector< std::vector< double > > fDoubleEvtValues
void InferType(const std::string &, unsigned int)
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
static const std::unordered_map< ColType_t, std::string > fgColTypeMap
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
static const TRegexp fgTrueRegex
void GenerateHeaders(size_t)
std::vector< std::vector< void * > > fColAddresses
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &tid) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
std::string AsString() final
bool Readln(std::string &line)
std::vector< std::string > fHeaders
ULong64_t fEntryRangesRequested
std::int64_t fMaxLineNumber
RCsvDS & operator=(RCsvDS &&)=delete
ULong64_t fProcessedLines
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InferColTypes(std::vector< std::string > &)
std::unordered_map< std::string, ColType_t > fColTypes
std::vector< std::vector< Long64_t > > fLong64EvtValues
RCsvDS(std::string_view fileName, const ROptions &options)
Constructor to create a CSV RDataSource for RDataFrame.
static const TRegexp fgDoubleRegex2
std::vector< Record_t > fRecords
std::set< std::string > fColContainingEmpty
~RCsvDS() final
Destructor.
static const TRegexp fgFalseRegex
static const TRegexp fgDoubleRegex3
void ValidateColTypes(std::vector< std::string > &) const
static const TRegexp fgIntRegex
RCsvDS(const RCsvDS &)=delete
std::vector< std::string > ParseColumns(const std::string &)
void FillHeaders(const std::string &)
std::unique_ptr< ROOT::Internal::RRawFile > fCsvFile
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::string GetLabel() final
Return a string representation of the datasource type.
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
static const TRegexp fgDoubleRegex1
RCsvDS & operator=(const RCsvDS &)=delete
std::vector< std::vector< std::string > > fStringEvtValues
std::vector< std::deque< bool > > fBoolEvtValues
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::list< ColType_t > fColTypesList
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Regular expression class.
Special implementation of ROOT::RRangeCast for TCollection, including a check that the cast target ty...
RDataFrame FromCSV(std::string_view fileName, const RCsvDS::ROptions &options)
Factory method to create a CSV RDataFrame.
Options that control how the CSV file is parsed.
bool fHeaders
The first line describes the columns.
bool fRightTrim
Trailing whitespaces are removed.
std::int64_t fSkipFirstNLines
Ignore the first N lines of the file.
std::vector< std::string > fColumnNames
Impose column names.
std::int64_t fSkipLastNLines
Ignore the last N lines of the file.
std::unordered_map< std::string, char > fColumnTypes
Specify custom column types, accepts an unordered map with keys being column name,...
bool fSkipBlankLines
Ignore empty lines (after trimming, if trimming is enabled).
char fDelimiter
Column delimiter character.
char fComment
Character indicating that the remainder of the line should be ignored, if different from '\0'.
bool fLeftTrim
Leading whitespaces are removed.
std::int64_t fLinesChunkSize
Number of lines to read, -1 to read all.