25#include <unordered_set>
31 if (!
dynamic_cast<const TChain *
>(tree) && !tree->GetCurrentFile()) {
47 const auto tree = fLoopManager->GetTree();
49 const auto treeName = tree->GetName();
50 const auto isTChain =
dynamic_cast<TChain *
>(tree) ?
true :
false;
51 const auto treeType = isTChain ?
"TChain" :
"TTree";
52 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true :
false;
54 const auto hasFriends = friendInfo.fFriendNames.empty() ? false :
true;
56 ss <<
"Dataframe from " << treeType;
58 ss <<
" " << treeName;
64 const auto numFiles = files.size();
66 ss <<
" in file " << files[0];
69 for (
auto i = 0u; i < numFiles; i++) {
70 ss <<
" " << files[i];
77 const auto numFriends = friendInfo.fFriendNames.size();
78 if (numFriends == 1) {
79 ss <<
"\nwith friend\n";
81 ss <<
"\nwith friends\n";
83 for (
auto i = 0u; i < numFriends; i++) {
84 const auto nameAlias = friendInfo.fFriendNames[i];
85 const auto files = friendInfo.fFriendFileNames[i];
86 const auto numFiles = files.size();
87 const auto subnames = friendInfo.fFriendChainSubNames[i];
88 ss <<
" " << nameAlias.first;
89 if (nameAlias.first != nameAlias.second)
90 ss <<
" (" << nameAlias.second <<
")";
93 ss <<
" " << files[0];
98 for (
auto j = 0u; j < numFiles; j++) {
99 ss <<
" " << subnames[j] <<
" " << files[j];
100 if (j < numFiles - 1)
104 if (i < numFriends - 1)
111 else if (fDataSource) {
112 const auto datasourceLabel = fDataSource->GetLabel();
113 return "Dataframe from datasource " + datasourceLabel;
117 const auto n = fLoopManager->GetNEmptyEntries();
119 return "Empty dataframe filling 1 row";
121 return "Empty dataframe filling " + std::to_string(
n) +
" rows";
127 : fLoopManager(lm), fDataSource(lm->GetDataSource()), fColRegister(lm.get())
134 fDataSource(lm.GetDataSource()),
135 fColRegister(colRegister)
155 std::unordered_set<std::string> allColumns;
157 auto addIfNotInternal = [&allColumns](std::string_view colName) {
159 allColumns.emplace(colName);
162 auto definedColumns = fColRegister.GenerateColumnNames();
164 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
166 auto tree = fLoopManager->GetTree();
169 allColumns.emplace(bName);
173 for (
const auto &s : fDataSource->GetColumnNames()) {
174 if (s.rfind(
"R_rdf_sizeof", 0) != 0)
175 allColumns.emplace(s);
180 std::sort(ret.begin(), ret.end());
199 const auto col = fColRegister.ResolveAlias(column);
203 const bool convertVector2RVec =
true;
205 fLoopManager->GetDataSource(), define, convertVector2RVec);
241 const auto columnNames = GetColumnNames();
242 std::set<std::string> definedColumnNamesSet;
243 for (
const auto &
name : GetDefinedColumnNames())
244 definedColumnNamesSet.insert(
name);
247 const std::vector<std::string> metadataProperties = {
"Columns in total",
"Columns from defines",
"Event loops run",
249 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
250 std::to_string(definedColumnNamesSet.size()),
251 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
257 const auto columnWidthValues =
258 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(),
static_cast<std::size_t
>(5u));
259 std::stringstream ss;
260 ss << std::left << std::setw(columnWidthProperties) <<
"Property" << std::setw(columnWidthValues) <<
"Value\n"
261 << std::setw(columnWidthProperties) <<
"--------" << std::setw(columnWidthValues) <<
"-----\n";
265 for (
auto i = 0u; i < metadataProperties.size(); i++) {
266 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
267 << std::setw(columnWidthValues) << metadataValues[i] <<
'\n';
273 const auto columnTypes = GetColumnTypeNamesList(columnNames);
275 ss << std::left << std::setw(columnWidthNames) <<
"Column" << std::setw(columnWidthTypes) <<
"Type"
277 << std::setw(columnWidthNames) <<
"------" << std::setw(columnWidthTypes) <<
"----"
281 const auto nCols = columnNames.size();
282 for (
auto i = 0u; i < nCols; i++) {
283 auto origin =
"Dataset";
284 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
286 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
313 const auto columns = fColRegister.BuildDefineNames();
314 for (
const auto &column : columns) {
316 definedColumns.emplace_back(column);
319 return definedColumns;
336 return fColRegister.BuildVariationsDescription();
355 if (fColRegister.IsDefineOrAlias(columnName))
358 if (fLoopManager->GetTree()) {
359 const auto &branchNames = fLoopManager->GetBranchNames();
360 const auto branchNamesEnd = branchNames.end();
361 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
365 if (fDataSource && fDataSource->HasColumn(columnName))
385 return fLoopManager->GetNSlots();
404 return fLoopManager->GetNRuns();
409 std::vector<std::string> types;
411 for (
auto column : columnList) {
412 types.push_back(GetColumnType(column));
420 std::string error(callerName);
421 error +=
" was called with ImplicitMT enabled, but multi-thread is not supported.";
422 throw std::runtime_error(error);
429 const std::string entryColName =
"rdfentry_";
430 const std::string entryColType =
"ULong64_t";
431 auto entryColGen = [](
unsigned int,
ULong64_t entry) {
return entry; };
432 using NewColEntry_t =
RDFDetail::RDefine<
decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
434 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
436 fColRegister.AddDefine(std::move(entryColumn));
439 const std::string slotColName =
"rdfslot_";
440 const std::string slotColType =
"unsigned int";
441 auto slotColGen = [](
unsigned int slot) {
return slot; };
442 using NewColSlot_t =
RDFDetail::RDefine<
decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
444 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen),
ColumnNames_t{},
445 fColRegister, *fLoopManager);
446 fColRegister.AddDefine(std::move(slotColumn));
448 fColRegister.AddAlias(
"tdfentry_", entryColName);
449 fColRegister.AddAlias(
"tdfslot_", slotColName);
unsigned long long ULong64_t
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
virtual std::size_t GetNFiles() const
Returns the number of files from which the dataset is constructed.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
unsigned int GetNRuns() const
Gets the number of event loops run.
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
A descriptor for the systematic variations known to a given RDataFrame node.
A chain is a collection of files containing TTree objects.
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.