85 namespace Experimental {
98 for (
auto &col : columns) {
105 std::istringstream lineStream(line);
110 for (
auto &col : columns) {
113 if (colType ==
"Long64_t") {
114 record.emplace_back(
new Long64_t(std::stoll(col)));
115 }
else if (colType ==
"double") {
116 record.emplace_back(
new double(std::stod(col)));
117 }
else if (colType ==
"bool") {
118 bool *
b =
new bool();
119 record.emplace_back(b);
120 std::istringstream is(col);
121 is >> std::boolalpha >> *
b;
123 record.emplace_back(
new std::string(col));
131 for (
size_t i = 0; i < size; ++i) {
132 fHeaders.push_back(
"Col" + std::to_string(i));
140 if ((colTypeName ==
"double" &&
typeid(
double) != ti) || (colTypeName ==
"Long64_t" &&
typeid(
Long64_t) != ti) ||
141 (colTypeName ==
"std::string" &&
typeid(std::string) != ti) || (colTypeName ==
"bool" &&
typeid(
bool) != ti)) {
142 std::string err =
"The type selected for column \"";
144 err +=
"\" does not correspond to column type, which is ";
146 throw std::runtime_error(err);
150 const auto index = std::distance(colNames.begin(), std::find(colNames.begin(), colNames.end(), colName));
151 std::vector<void *> ret(
fNSlots);
154 if (ti ==
typeid(
double)) {
156 }
else if (ti ==
typeid(
Long64_t)) {
158 }
else if (ti ==
typeid(std::string)) {
171 for (
auto &col : columns) {
189 type =
"std::string";
199 std::vector<std::string> columns;
201 for (
size_t i = 0; i < line.size(); ++i) {
210 std::stringstream val;
213 for (; i < line.size(); ++i) {
216 }
else if (line[i] ==
'"') {
218 if (line[i + 1] !=
'"') {
228 columns.emplace_back(val.str());
248 if (std::getline(stream, line)) {
251 std::string msg =
"Error reading headers of CSV file ";
253 throw std::runtime_error(msg);
257 if (std::getline(stream, line)) {
272 }
while (std::getline(stream, line));
281 for (
size_t i = 0; i < record.size(); ++i) {
285 if (colType ==
"Long64_t") {
287 }
else if (colType ==
"double") {
288 delete static_cast<double *
>(p);
289 }
else if (colType ==
"bool") {
290 delete static_cast<bool *
>(p);
292 delete static_cast<std::string *
>(p);
312 std::string msg =
"The dataset does not have column ";
314 throw std::runtime_error(msg);
329 auto dataPtr =
fRecords[entry][colIndex];
330 if (colType ==
"double") {
332 }
else if (colType ==
"Long64_t") {
334 }
else if (colType ==
"std::string") {
345 assert(0U ==
fNSlots &&
"Setting the number of slots even if the number of slots is different from zero.");
349 const auto nColumns =
fHeaders.size();
362 const auto nRecords =
fRecords.size();
363 const auto chunkSize = nRecords /
fNSlots;
std::vector< std::string > fHeaders
basic_string_view< char > string_view
Namespace for new ROOT classes and functions.
Regular expression class.
void SetNSlots(unsigned int nSlots)
Inform TDataSource of the number of processing slots (i.e.
std::vector< std::deque< bool > > fBoolEvtValues
void InferType(const std::string &, unsigned int)
void SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot...
std::vector< std::vector< std::string > > fStringEvtValues
static TRegexp doubleRegex2
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
void FillHeaders(const std::string &)
void FillRecord(const std::string &, Record &)
std::vector< std::vector< Long64_t > > fLong64EvtValues
void Initialise()
Convenience method called before starting an event-loop.
std::vector< void * > Record
std::vector< std::vector< void * > > fColAddresses
void InferColTypes(std::vector< std::string > &)
std::vector< std::vector< double > > fDoubleEvtValues
TDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders=true, char delimiter=',')
Factory method to create a CSV TDataFrame.
std::vector< Record > fRecords
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
std::vector< std::string > ParseColumns(const std::string &)
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
TCsvDS(std::string_view fileName, bool readHeaders=true, char delimiter=',')
Constructor to create a CSV TDataSource for TDataFrame.
void GenerateHeaders(size_t)
A pseudo container class which is a generator of indices.
unsigned long long ULong64_t
static RooMathCoreReg dummy
std::list< std::string > fColTypesList
static TRegexp doubleRegex1
typedef void((*Func_t)())
ROOT's TDataFrame offers a high level interface for analyses of data stored in TTrees.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
std::map< std::string, std::string > fColTypes
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
static TRegexp falseRegex
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.