Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/InternalTreeUtils.hxx> // GetFriendInfo, GetFileNamesFromTree
14#include <ROOT/RDF/Utils.hxx>
16#include <ROOT/RStringView.hxx>
17#include <TTree.h>
18
19#include <algorithm> // std::for_each
20#include <iomanip> // std::setw
21#include <memory>
22#include <set>
23#include <sstream>
24#include <string>
25#include <unordered_set>
26
28{
29 // TTree/TChain as input
30 const auto tree = fLoopManager->GetTree();
31
32 if (tree) {
34 return files.size();
35 }
36 return 0;
37}
38
40{
41 // TTree/TChain as input
42 const auto tree = fLoopManager->GetTree();
43 if (tree) {
44 const auto treeName = tree->GetName();
45 const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
46 const auto treeType = isTChain ? "TChain" : "TTree";
47 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
48 const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
49 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
50 std::stringstream ss;
51 ss << "Dataframe from " << treeType << " " << treeName;
52 if (isInMemory) {
53 ss << " (in-memory)";
54 } else {
56 const auto numFiles = files.size();
57 if (numFiles == 1) {
58 ss << " in file " << files[0];
59 } else {
60 ss << " in files\n";
61 for (auto i = 0u; i < numFiles; i++) {
62 ss << " " << files[i];
63 if (i < numFiles - 1)
64 ss << '\n';
65 }
66 }
67 }
68 if (hasFriends) {
69 const auto numFriends = friendInfo.fFriendNames.size();
70 if (numFriends == 1) {
71 ss << "\nwith friend\n";
72 } else {
73 ss << "\nwith friends\n";
74 }
75 for (auto i = 0u; i < numFriends; i++) {
76 const auto nameAlias = friendInfo.fFriendNames[i];
77 const auto files = friendInfo.fFriendFileNames[i];
78 const auto numFiles = files.size();
79 const auto subnames = friendInfo.fFriendChainSubNames[i];
80 ss << " " << nameAlias.first;
81 if (nameAlias.first != nameAlias.second)
82 ss << " (" << nameAlias.second << ")";
83 // case: TTree as friend
84 if (numFiles == 1) {
85 ss << " " << files[0];
86 }
87 // case: TChain as friend
88 else {
89 ss << '\n';
90 for (auto j = 0u; j < numFiles; j++) {
91 ss << " " << subnames[j] << " " << files[j];
92 if (j < numFiles - 1)
93 ss << '\n';
94 }
95 }
96 if (i < numFriends - 1)
97 ss << '\n';
98 }
99 }
100 return ss.str();
101 }
102 // Datasource as input
103 else if (fDataSource) {
104 const auto datasourceLabel = fDataSource->GetLabel();
105 return "Dataframe from datasource " + datasourceLabel;
106 }
107 // Trivial/empty datasource
108 else {
109 const auto n = fLoopManager->GetNEmptyEntries();
110 if (n == 1) {
111 return "Empty dataframe filling 1 row";
112 } else {
113 return "Empty dataframe filling " + std::to_string(n) + " rows";
114 }
115 }
116}
117
118ROOT::RDF::RInterfaceBase::RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm)
119 : fLoopManager(lm.get()), fDataSource(lm->GetDataSource()), fColRegister(std::move(lm))
120{
122}
123
125 : fLoopManager(&lm), fDataSource(lm.GetDataSource()), fColRegister(colRegister)
126{
127}
128
129/////////////////////////////////////////////////////////////////////////////
130/// \brief Returns the names of the available columns.
131/// \return the container of column names.
132///
133/// This is not an action nor a transformation, just a query to the RDataFrame object.
134///
135/// ### Example usage:
136/// ~~~{.cpp}
137/// auto colNames = d.GetColumnNames();
138/// // Print columns' names
139/// for (auto &&colName : colNames) std::cout << colName << std::endl;
140/// ~~~
141///
143{
144 // there could be duplicates between Redefined columns and columns in the data source
145 std::unordered_set<std::string> allColumns;
146
147 auto addIfNotInternal = [&allColumns](std::string_view colName) {
148 if (!RDFInternal::IsInternalColumn(colName))
149 allColumns.emplace(colName);
150 };
151
152 auto definedColumns = fColRegister.GetNames();
153
154 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
155
156 auto tree = fLoopManager->GetTree();
157 if (tree) {
158 for (const auto &bName : RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false))
159 allColumns.emplace(bName);
160 }
161
162 if (fDataSource) {
163 for (const auto &s : fDataSource->GetColumnNames()) {
164 if (s.rfind("R_rdf_sizeof", 0) != 0)
165 allColumns.emplace(s);
166 }
167 }
168
169 ColumnNames_t ret(allColumns.begin(), allColumns.end());
170 std::sort(ret.begin(), ret.end());
171 return ret;
172}
173
174/////////////////////////////////////////////////////////////////////////////
175/// \brief Return the type of a given column as a string.
176/// \return the type of the required column.
177///
178/// This is not an action nor a transformation, just a query to the RDataFrame object.
179///
180/// ### Example usage:
181/// ~~~{.cpp}
182/// auto colType = d.GetColumnType("columnName");
183/// // Print column type
184/// std::cout << "Column " << colType << " has type " << colType << std::endl;
185/// ~~~
186///
187std::string ROOT::RDF::RInterfaceBase::GetColumnType(std::string_view column)
188{
189 const auto col = fColRegister.ResolveAlias(std::string(column));
190
191 RDFDetail::RDefineBase *define = fColRegister.GetDefine(col);
192
193 const bool convertVector2RVec = true;
194 return RDFInternal::ColumnName2ColumnTypeName(col, fLoopManager->GetTree(), fLoopManager->GetDataSource(), define,
195 convertVector2RVec);
196}
197
198/////////////////////////////////////////////////////////////////////////////
199/// \brief Return information about the dataframe.
200/// \return information about the dataframe as RDFDescription object
201///
202/// This convenience function describes the dataframe and combines the following information:
203/// - Number of event loops run, see GetNRuns()
204/// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
205/// - Column names, see GetColumnNames()
206/// - Column types, see GetColumnType()
207/// - Number of processing slots, see GetNSlots()
208///
209/// This is not an action nor a transformation, just a query to the RDataFrame object.
210/// The result is dependent on the node from which this method is called, e.g. the list of
211/// defined columns returned by GetDefinedColumnNames().
212///
213/// Please note that this is a convenience feature and the layout of the output can be subject
214/// to change and should be parsed via RDFDescription methods.
215///
216/// ### Example usage:
217/// ~~~{.cpp}
218/// RDataFrame df(10);
219/// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
220/// // Describe the dataframe
221/// df2.Describe().Print()
222/// df2.Describe().Print(/*shortFormat=*/true)
223/// std::cout << df2.Describe().AsString() << std::endl;
224/// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
225/// ~~~
226///
228{
229 // Build set of defined column names to find later in all column names
230 // the defined columns more efficiently
231 const auto columnNames = GetColumnNames();
232 std::set<std::string> definedColumnNamesSet;
233 for (const auto &name : GetDefinedColumnNames())
234 definedColumnNamesSet.insert(name);
235
236 // Get information for the metadata table
237 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines", "Event loops run",
238 "Processing slots"};
239 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
240 std::to_string(definedColumnNamesSet.size()),
241 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
242
243 // Set header for metadata table
244 const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
245 // The column width of the values is required to make right-bound numbers and is equal
246 // to the maximum of the string "Value" and all values to be put in this column.
247 const auto columnWidthValues =
248 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
249 std::stringstream ss;
250 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
251 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
252
253 // Build metadata table
254 // All numbers should be bound to the right and strings bound to the left.
255 for (auto i = 0u; i < metadataProperties.size(); i++) {
256 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
257 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
258 }
259 ss << '\n'; // put space between this and the next table
260
261 // Set header for columns table
262 const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
263 const auto columnTypes = GetColumnTypeNamesList(columnNames);
264 const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
265 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
266 << "Origin\n"
267 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
268 << "------\n";
269
270 // Build columns table
271 const auto nCols = columnNames.size();
272 for (auto i = 0u; i < nCols; i++) {
273 auto origin = "Dataset";
274 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
275 origin = "Define";
276 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
277 << origin << '\n';
278 }
279 // Use the string returned from DescribeDataset() as the 'brief' description
280 // Use the converted to string stringstream ss as the 'full' description
281 return RDFDescription(DescribeDataset(), ss.str());
282}
283
284/// \brief Returns the names of the defined columns.
285/// \return the container of the defined column names.
286///
287/// This is not an action nor a transformation, just a simple utility to
288/// get the columns names that have been defined up to the node.
289/// If no column has been defined, e.g. on a root node, it returns an
290/// empty collection.
291///
292/// ### Example usage:
293/// ~~~{.cpp}
294/// auto defColNames = d.GetDefinedColumnNames();
295/// // Print defined columns' names
296/// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
297/// ~~~
298///
300{
301 ColumnNames_t definedColumns;
302
303 const auto columns = fColRegister.BuildDefineNames();
304 for (const auto &column : columns) {
306 definedColumns.emplace_back(column);
307 }
308
309 return definedColumns;
310}
311
312/// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
313///
314/// This is not an action nor a transformation, just a simple utility to
315/// inspect the systematic variations that have been registered with Vary() up to this node.
316/// When called on the root node, it returns an empty descriptor.
317///
318/// ### Example usage:
319/// ~~~{.cpp}
320/// auto variations = d.GetVariations();
321/// variations.Print();
322/// ~~~
323///
325{
326 return fColRegister.BuildVariationsDescription();
327}
328
329/// \brief Checks if a column is present in the dataset.
330/// \return true if the column is available, false otherwise
331///
332/// This method checks if a column is part of the input ROOT dataset, has
333/// been defined or can be provided by the data source.
334///
335/// Example usage:
336/// ~~~{.cpp}
337/// ROOT::RDataFrame base(1);
338/// auto rdf = base.Define("definedColumn", [](){return 0;});
339/// rdf.HasColumn("definedColumn"); // true: we defined it
340/// rdf.HasColumn("rdfentry_"); // true: it's always there
341/// rdf.HasColumn("foo"); // false: it is not there
342/// ~~~
343bool ROOT::RDF::RInterfaceBase::HasColumn(std::string_view columnName)
344{
345 if (fColRegister.IsDefineOrAlias(columnName))
346 return true;
347
348 if (fLoopManager->GetTree()) {
349 const auto &branchNames = fLoopManager->GetBranchNames();
350 const auto branchNamesEnd = branchNames.end();
351 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
352 return true;
353 }
354
355 if (fDataSource && fDataSource->HasColumn(columnName))
356 return true;
357
358 return false;
359}
360
361/// \brief Gets the number of data processing slots.
362/// \return The number of data processing slots used by this RDataFrame instance
363///
364/// This method returns the number of data processing slots used by this RDataFrame
365/// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
366///
367/// Example usage:
368/// ~~~{.cpp}
369/// ROOT::EnableImplicitMT(6)
370/// ROOT::RDataFrame df(1);
371/// std::cout << df.GetNSlots() << std::endl; // prints "6"
372/// ~~~
374{
375 return fLoopManager->GetNSlots();
376}
377
378/// \brief Gets the number of event loops run.
379/// \return The number of event loops run by this RDataFrame instance
380///
381/// This method returns the number of events loops run so far by this RDataFrame instance.
382///
383/// Example usage:
384/// ~~~{.cpp}
385/// ROOT::RDataFrame df(1);
386/// std::cout << df.GetNRuns() << std::endl; // prints "0"
387/// df.Sum("rdfentry_").GetValue(); // trigger the event loop
388/// std::cout << df.GetNRuns() << std::endl; // prints "1"
389/// df.Sum("rdfentry_").GetValue(); // trigger another event loop
390/// std::cout << df.GetNRuns() << std::endl; // prints "2"
391/// ~~~
393{
394 return fLoopManager->GetNRuns();
395}
396
398{
399 std::vector<std::string> types;
400
401 for (auto column : columnList) {
402 types.push_back(GetColumnType(column));
403 }
404 return types;
405}
406
407void ROOT::RDF::RInterfaceBase::CheckIMTDisabled(std::string_view callerName)
408{
410 std::string error(callerName);
411 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
412 throw std::runtime_error(error);
413 }
414}
415
417{
418 // Entry number column
419 const std::string entryColName = "rdfentry_";
420 const std::string entryColType = "ULong64_t";
421 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
422 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
423
424 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
425 ColumnNames_t{}, fColRegister, *fLoopManager);
426 fColRegister.AddDefine(std::move(entryColumn));
427
428 // Slot number column
429 const std::string slotColName = "rdfslot_";
430 const std::string slotColType = "unsigned int";
431 auto slotColGen = [](unsigned int slot) { return slot; };
432 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
433
434 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
435 fColRegister, *fLoopManager);
436 fColRegister.AddDefine(std::move(slotColumn));
437
438 fColRegister.AddAlias("tdfentry_", entryColName);
439 fColRegister.AddAlias("tdfslot_", slotColName);
440}
unsigned long long ULong64_t
Definition RtypesCore.h:81
char name[80]
Definition TGX11.cxx:110
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
unsigned int GetNRuns() const
Gets the number of event loops run.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
A descriptor for the systematic variations known to a given RDataFrame node.
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
const Int_t n
Definition legend1.C:16
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
unsigned int GetNSlots()
Definition RDFUtils.cxx:283
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:222
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:372
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:363
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570
Definition tree.py:1