Logo ROOT   6.12/07
Reference Guide
TCsvDS.hxx
Go to the documentation of this file.
1 #ifndef ROOT_TCSVTDS
2 #define ROOT_TCSVTDS
3 
4 #include "ROOT/TDataFrame.hxx"
5 #include "ROOT/TDataSource.hxx"
6 
7 #include <deque>
8 #include <list>
9 #include <map>
10 #include <vector>
11 
12 #include <TRegexp.h>
13 
14 namespace ROOT {
15 namespace Experimental {
16 namespace TDF {
17 
19 
20 private:
21  using Record = std::vector<void *>;
22 
23  unsigned int fNSlots = 0U;
24  std::string fFileName;
25  char fDelimiter;
26  std::vector<std::string> fHeaders;
27  std::map<std::string, std::string> fColTypes;
28  std::list<std::string> fColTypesList;
29  std::vector<std::vector<void *>> fColAddresses; // fColAddresses[column][slot]
30  std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges;
31  std::vector<Record> fRecords; // fRecords[entry][column]
32  std::vector<std::vector<double>> fDoubleEvtValues ; // one per column per slot
33  std::vector<std::vector<Long64_t>> fLong64EvtValues ; // one per column per slot
34  std::vector<std::vector<std::string>> fStringEvtValues ; // one per column per slot
35  // This must be a deque to avoid the specialisation vector<bool>. This would not
36  // work given that the pointer to the boolean in that case cannot be taken
37  std::vector<std::deque<bool>> fBoolEvtValues ; // one per column per slot
38 
40 
41  void FillHeaders(const std::string &);
42  void FillRecord(const std::string &, Record &);
43  void GenerateHeaders(size_t);
44  std::vector<void *> GetColumnReadersImpl(std::string_view, const std::type_info &);
45  void InferColTypes(std::vector<std::string> &);
46  void InferType(const std::string &, unsigned int);
47  std::vector<std::string> ParseColumns(const std::string &);
48  size_t ParseValue(const std::string &, std::vector<std::string> &, size_t);
49 
50 public:
51  TCsvDS(std::string_view fileName, bool readHeaders = true, char delimiter = ',');
52  ~TCsvDS();
53  const std::vector<std::string> &GetColumnNames() const;
54  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges();
55  std::string GetTypeName(std::string_view colName) const;
56  bool HasColumn(std::string_view colName) const;
57  void SetEntry(unsigned int slot, ULong64_t entry);
58  void SetNSlots(unsigned int nSlots);
59  void Initialise();
60 };
61 
62 ////////////////////////////////////////////////////////////////////////////////////////////////
63 /// \brief Factory method to create a CSV TDataFrame.
64 /// \param[in] fileName Path of the CSV file.
65 /// \param[in] readHeaders `true` if the CSV file contains headers as first row, `false` otherwise
66 /// (default `true`).
67 /// \param[in] delimiter Delimiter character (default ',').
68 TDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders = true, char delimiter = ',');
69 
70 } // ns TDF
71 } // ns Experimental
72 } // ns ROOT
73 
74 #endif
std::vector< std::string > fHeaders
Definition: TCsvDS.hxx:26
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
Regular expression class.
Definition: TRegexp.h:31
void SetNSlots(unsigned int nSlots)
Inform TDataSource of the number of processing slots (i.e.
Definition: TCsvDS.cxx:343
std::vector< std::deque< bool > > fBoolEvtValues
Definition: TCsvDS.hxx:37
void InferType(const std::string &, unsigned int)
Definition: TCsvDS.cxx:177
void SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot...
Definition: TCsvDS.cxx:325
std::vector< std::vector< std::string > > fStringEvtValues
Definition: TCsvDS.hxx:34
static TRegexp doubleRegex2
Definition: TCsvDS.hxx:39
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition: TCsvDS.cxx:320
void FillHeaders(const std::string &)
Definition: TCsvDS.cxx:95
void FillRecord(const std::string &, Record &)
Definition: TCsvDS.cxx:103
std::vector< std::vector< Long64_t > > fLong64EvtValues
Definition: TCsvDS.hxx:33
void Initialise()
Convenience method called before starting an event-loop.
Definition: TCsvDS.cxx:360
std::vector< void * > Record
Definition: TCsvDS.hxx:21
std::vector< std::vector< void * > > fColAddresses
Definition: TCsvDS.hxx:29
void InferColTypes(std::vector< std::string > &)
Definition: TCsvDS.cxx:168
TDataFrame data source class for reading CSV files.
Definition: TCsvDS.hxx:18
std::vector< std::vector< double > > fDoubleEvtValues
Definition: TCsvDS.hxx:32
TDataSource defines an API that TDataFrame can use to read arbitrary data formats.
Definition: TDataSource.hxx:51
TDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders=true, char delimiter=',')
Factory method to create a CSV TDataFrame.
Definition: TCsvDS.cxx:377
std::vector< Record > fRecords
Definition: TCsvDS.hxx:31
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
Definition: TCsvDS.cxx:136
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: TCsvDS.hxx:30
std::vector< std::string > ParseColumns(const std::string &)
Definition: TCsvDS.cxx:197
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
Definition: TCsvDS.cxx:208
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition: TCsvDS.cxx:303
TCsvDS(std::string_view fileName, bool readHeaders=true, char delimiter=',')
Constructor to create a CSV TDataSource for TDataFrame.
Definition: TCsvDS.cxx:239
unsigned long long ULong64_t
Definition: RtypesCore.h:70
std::list< std::string > fColTypesList
Definition: TCsvDS.hxx:28
static TRegexp doubleRegex1
Definition: TCsvDS.hxx:39
ROOT&#39;s TDataFrame offers a high level interface for analyses of data stored in TTrees.
Definition: TDataFrame.hxx:39
std::map< std::string, std::string > fColTypes
Definition: TCsvDS.hxx:27
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition: TCsvDS.cxx:309
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset&#39;s column names.
Definition: TCsvDS.cxx:298