25#include <unordered_set>
30 return dataSource->GetNFiles();
45 return "Empty dataframe filling 1 row";
47 return "Empty dataframe filling " + std::to_string(
n) +
" rows";
60 fColRegister(colRegister)
80 std::unordered_set<std::string> allColumns;
82 auto addIfNotInternal = [&allColumns](std::string_view colName) {
84 allColumns.emplace(colName);
87 auto definedColumns =
fColRegister.GenerateColumnNames();
89 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
93 if (s.rfind(
"R_rdf_sizeof", 0) != 0)
94 allColumns.emplace(s);
99 std::sort(
ret.begin(),
ret.end());
119 std::sort(
ret.begin(),
ret.end());
142 RDFDetail::RDefineBase *define =
fColRegister.GetDefine(col);
144 const bool convertVector2RVec =
true;
183 std::set<std::string> definedColumnNamesSet;
185 definedColumnNamesSet.insert(
name);
188 const std::vector<std::string> metadataProperties = {
"Columns in total",
"Columns from defines",
"Event loops run",
190 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
191 std::to_string(definedColumnNamesSet.size()),
198 const auto columnWidthValues =
199 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(),
static_cast<std::size_t
>(5u));
200 std::stringstream ss;
201 ss << std::left << std::setw(columnWidthProperties) <<
"Property" << std::setw(columnWidthValues) <<
"Value\n"
202 << std::setw(columnWidthProperties) <<
"--------" << std::setw(columnWidthValues) <<
"-----\n";
206 for (
auto i = 0u; i < metadataProperties.size(); i++) {
207 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
208 << std::setw(columnWidthValues) << metadataValues[i] <<
'\n';
216 ss << std::left << std::setw(columnWidthNames) <<
"Column" << std::setw(columnWidthTypes) <<
"Type"
218 << std::setw(columnWidthNames) <<
"------" << std::setw(columnWidthTypes) <<
"----"
222 const auto nCols = columnNames.size();
223 for (
auto i = 0u; i < nCols; i++) {
224 auto origin =
"Dataset";
225 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
227 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
255 for (
const auto &column : columns) {
257 definedColumns.emplace_back(column);
260 return definedColumns;
299 if (
auto ds =
GetDataSource(); ds && ds->HasColumn(columnName))
343 std::vector<std::string> types;
345 for (
auto column : columnList) {
354 std::string error(callerName);
355 error +=
" was called with ImplicitMT enabled, but multi-thread is not supported.";
356 throw std::runtime_error(error);
363 const std::string entryColName =
"rdfentry_";
364 const std::string entryColType =
"ULong64_t";
365 auto entryColGen = [](
unsigned int,
ULong64_t entry) {
return entry; };
366 using NewColEntry_t = RDFDetail::RDefine<
decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
368 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
373 const std::string slotColName =
"rdfslot_";
374 const std::string slotColType =
"unsigned int";
375 auto slotColGen = [](
unsigned int slot) {
return slot; };
376 using NewColSlot_t = RDFDetail::RDefine<
decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
378 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen),
ColumnNames_t{},
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
unsigned int GetNRuns() const
Gets the number of event loops run.
ColumnNames_t GetDatasetTopLevelFieldNames()
Retrieve the names of top-level field names.
RDataSource * GetDataSource() const
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
RDFDetail::RLoopManager RLoopManager
A descriptor for the systematic variations known to a given RDataFrame node.
Special implementation of ROOT::RRangeCast for TCollection, including a check that the cast target ty...
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
std::vector< std::string > ColumnNames_t
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.