25#include <unordered_set> 
   32      const auto treeName = 
tree->GetName();
 
   33      const auto isTChain = 
dynamic_cast<TChain *
>(
tree) ? 
true : 
false;
 
   34      const auto treeType = isTChain ? 
"TChain" : 
"TTree";
 
   35      const auto isInMemory = !isTChain && !
tree->GetCurrentFile() ? true : 
false;
 
   37      const auto hasFriends = friendInfo.fFriendNames.empty() ? false : 
true;
 
   39      ss << 
"Dataframe from " << treeType << 
" " << treeName;
 
   44         const auto numFiles = files.size();
 
   46            ss << 
" in file " << files[0];
 
   49            for (
auto i = 0u; i < numFiles; i++) {
 
   50               ss << 
"  " << files[i];
 
   57         const auto numFriends = friendInfo.fFriendNames.size();
 
   58         if (numFriends == 1) {
 
   59            ss << 
"\nwith friend\n";
 
   61            ss << 
"\nwith friends\n";
 
   63         for (
auto i = 0u; i < numFriends; i++) {
 
   64            const auto nameAlias = friendInfo.fFriendNames[i];
 
   65            const auto files = friendInfo.fFriendFileNames[i];
 
   66            const auto numFiles = files.size();
 
   67            const auto subnames = friendInfo.fFriendChainSubNames[i];
 
   68            ss << 
"  " << nameAlias.first;
 
   69            if (nameAlias.first != nameAlias.second)
 
   70               ss << 
" (" << nameAlias.second << 
")";
 
   73               ss << 
" " << files[0];
 
   78               for (
auto j = 0u; j < numFiles; j++) {
 
   79                  ss << 
"    " << subnames[j] << 
" " << files[j];
 
   84            if (i < numFriends - 1)
 
   93      return "Dataframe from datasource " + datasourceLabel;
 
   99         return "Empty dataframe filling 1 row";
 
  101         return "Empty dataframe filling " + std::to_string(
n) + 
" rows";
 
  107   : fLoopManager(lm.get()), fDataSource(lm->GetDataSource()), fColRegister(std::move(lm))
 
  113   : fLoopManager(&lm), fDataSource(lm.GetDataSource()), fColRegister(colRegister)
 
  133   std::unordered_set<std::string> allColumns;
 
  135   auto addIfNotInternal = [&allColumns](std::string_view colName) {
 
  137         allColumns.emplace(colName);
 
  140   auto definedColumns = fColRegister.GetNames();
 
  142   std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
 
  144   auto tree = fLoopManager->GetTree();
 
  147         allColumns.emplace(bName);
 
  151      for (
const auto &s : fDataSource->GetColumnNames()) {
 
  152         if (s.rfind(
"R_rdf_sizeof", 0) != 0)
 
  153            allColumns.emplace(s);
 
  158   std::sort(ret.begin(), ret.end());
 
  177   const auto col = fColRegister.ResolveAlias(std::string(column));
 
  181   const bool convertVector2RVec = 
true;
 
  219   const auto columnNames = GetColumnNames();
 
  220   std::set<std::string> definedColumnNamesSet;
 
  221   for (
const auto &
name : GetDefinedColumnNames())
 
  222      definedColumnNamesSet.insert(
name);
 
  225   const std::vector<std::string> metadataProperties = {
"Columns in total", 
"Columns from defines", 
"Event loops run",
 
  227   const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
 
  228                                                    std::to_string(definedColumnNamesSet.size()),
 
  229                                                    std::to_string(GetNRuns()), std::to_string(
GetNSlots())};
 
  235   const auto columnWidthValues =
 
  236      std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), 
static_cast<std::size_t
>(5u));
 
  237   std::stringstream ss;
 
  238   ss << std::left << std::setw(columnWidthProperties) << 
"Property" << std::setw(columnWidthValues) << 
"Value\n" 
  239      << std::setw(columnWidthProperties) << 
"--------" << std::setw(columnWidthValues) << 
"-----\n";
 
  243   for (
auto i = 0u; i < metadataProperties.size(); i++) {
 
  244      ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
 
  245         << std::setw(columnWidthValues) << metadataValues[i] << 
'\n';
 
  251   const auto columnTypes = GetColumnTypeNamesList(columnNames);
 
  253   ss << std::left << std::setw(columnWidthNames) << 
"Column" << std::setw(columnWidthTypes) << 
"Type" 
  255      << std::setw(columnWidthNames) << 
"------" << std::setw(columnWidthTypes) << 
"----" 
  259   const auto nCols = columnNames.size();
 
  260   for (
auto i = 0u; i < nCols; i++) {
 
  261      auto origin = 
"Dataset";
 
  262      if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
 
  264      ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
 
  293   const auto columns = fColRegister.BuildDefineNames();
 
  294   for (
const auto &column : columns) {
 
  296         definedColumns.emplace_back(column);
 
  299   return definedColumns;
 
  316   return fColRegister.BuildVariationsDescription();
 
  335   if (fColRegister.IsDefineOrAlias(columnName))
 
  338   if (fLoopManager->GetTree()) {
 
  339      const auto &branchNames = fLoopManager->GetBranchNames();
 
  340      const auto branchNamesEnd = branchNames.end();
 
  341      if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
 
  345   if (fDataSource && fDataSource->HasColumn(columnName))
 
  365   return fLoopManager->GetNSlots();
 
  384   return fLoopManager->GetNRuns();
 
  389   std::vector<std::string> types;
 
  391   for (
auto column : columnList) {
 
  392      types.push_back(GetColumnType(column));
 
  400      std::string error(callerName);
 
  401      error += 
" was called with ImplicitMT enabled, but multi-thread is not supported.";
 
  402      throw std::runtime_error(error);
 
  409   const std::string entryColName = 
"rdfentry_";
 
  410   const std::string entryColType = 
"ULong64_t";
 
  411   auto entryColGen = [](
unsigned int, 
ULong64_t entry) { 
return entry; };
 
  412   using NewColEntry_t = 
RDFDetail::RDefine<
decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
 
  414   auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
 
  416   fColRegister.AddDefine(std::move(entryColumn));
 
  419   const std::string slotColName = 
"rdfslot_";
 
  420   const std::string slotColType = 
"unsigned int";
 
  421   auto slotColGen = [](
unsigned int slot) { 
return slot; };
 
  422   using NewColSlot_t = 
RDFDetail::RDefine<
decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
 
  424   auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), 
ColumnNames_t{},
 
  425                                                    fColRegister, *fLoopManager);
 
  426   fColRegister.AddDefine(std::move(slotColumn));
 
  428   fColRegister.AddAlias(
"tdfentry_", entryColName);
 
  429   fColRegister.AddAlias(
"tdfslot_", slotColName);
 
unsigned long long ULong64_t
 
The head node of a RDF computation graph.
 
ULong64_t GetNEmptyEntries() const
 
A binder for user-defined columns, variations and aliases.
 
A DFDescription contains useful information about a given RDataFrame computation graph.
 
virtual std::string GetLabel()
Return a string representation of the datasource type.
 
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
 
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
 
RDFDescription Describe()
Return information about the dataframe.
 
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
 
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
 
unsigned int GetNRuns() const
Gets the number of event loops run.
 
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
 
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
 
void CheckIMTDisabled(std::string_view callerName)
 
unsigned int GetNSlots() const
Gets the number of data processing slots.
 
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
 
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
 
std::string DescribeDataset() const
 
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
 
A descriptor for the systematic variations known to a given RDataFrame node.
 
A chain is a collection of files containing TTree objects.
 
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
 
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
 
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
 
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
 
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
 
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
 
std::vector< std::string > ColumnNames_t
 
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.