Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/InternalTreeUtils.hxx> // GetFriendInfo, GetFileNamesFromTree
14#include <ROOT/RDF/Utils.hxx>
16#include <string_view>
17#include <TTree.h>
18
19#include <algorithm> // std::for_each
20#include <iomanip> // std::setw
21#include <memory>
22#include <set>
23#include <sstream>
24#include <string>
25#include <unordered_set>
26
28{
29 if (auto dataSource = GetDataSource()) {
30 return dataSource->GetNFiles();
31 }
32 return 0;
33}
34
36{
37 // Datasource as input
38 if (auto ds = GetDataSource()) {
40 }
41 // Trivial/empty datasource
42 else {
43 const auto n = fLoopManager->GetNEmptyEntries();
44 if (n == 1) {
45 return "Empty dataframe filling 1 row";
46 } else {
47 return "Empty dataframe filling " + std::to_string(n) + " rows";
48 }
49 }
50}
51
52ROOT::RDF::RInterfaceBase::RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm)
53 : fLoopManager(lm), fColRegister(lm.get())
54{
56}
57
59 : fLoopManager(std::shared_ptr<ROOT::Detail::RDF::RLoopManager>{&lm, [](ROOT::Detail::RDF::RLoopManager *) {}}),
60 fColRegister(colRegister)
61{
62}
63
64/////////////////////////////////////////////////////////////////////////////
65/// \brief Returns the names of the available columns.
66/// \return the container of column names.
67///
68/// This is not an action nor a transformation, just a query to the RDataFrame object.
69///
70/// ### Example usage:
71/// ~~~{.cpp}
72/// auto colNames = d.GetColumnNames();
73/// // Print columns' names
74/// for (auto &&colName : colNames) std::cout << colName << std::endl;
75/// ~~~
76///
78{
79 // there could be duplicates between Redefined columns and columns in the data source
80 std::unordered_set<std::string> allColumns;
81
82 auto addIfNotInternal = [&allColumns](std::string_view colName) {
84 allColumns.emplace(colName);
85 };
86
87 auto definedColumns = fColRegister.GenerateColumnNames();
88
90
91 if (auto ds = GetDataSource()) {
93 if (s.rfind("R_rdf_sizeof", 0) != 0)
94 allColumns.emplace(s);
95 }
96 }
97
99 std::sort(ret.begin(), ret.end());
100 return ret;
101}
102
103/////////////////////////////////////////////////////////////////////////////
104/// \brief Return the type of a given column as a string.
105/// \return the type of the required column.
106///
107/// This is not an action nor a transformation, just a query to the RDataFrame object.
108///
109/// ### Example usage:
110/// ~~~{.cpp}
111/// auto colType = d.GetColumnType("columnName");
112/// // Print column type
113/// std::cout << "Column " << colType << " has type " << colType << std::endl;
114/// ~~~
115///
116std::string ROOT::RDF::RInterfaceBase::GetColumnType(std::string_view column)
117{
118 const auto col = fColRegister.ResolveAlias(column);
119
120 RDFDetail::RDefineBase *define = fColRegister.GetDefine(col);
121
122 const bool convertVector2RVec = true;
123 return RDFInternal::ColumnName2ColumnTypeName(std::string(col), nullptr, fLoopManager->GetDataSource(), define,
125}
126
127/////////////////////////////////////////////////////////////////////////////
128/// \brief Return information about the dataframe.
129/// \return information about the dataframe as RDFDescription object
130///
131/// This convenience function describes the dataframe and combines the following information:
132/// - Number of event loops run, see GetNRuns()
133/// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
134/// - Column names, see GetColumnNames()
135/// - Column types, see GetColumnType()
136/// - Number of processing slots, see GetNSlots()
137///
138/// This is not an action nor a transformation, just a query to the RDataFrame object.
139/// The result is dependent on the node from which this method is called, e.g. the list of
140/// defined columns returned by GetDefinedColumnNames().
141///
142/// Please note that this is a convenience feature and the layout of the output can be subject
143/// to change and should be parsed via RDFDescription methods.
144///
145/// ### Example usage:
146/// ~~~{.cpp}
147/// RDataFrame df(10);
148/// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
149/// // Describe the dataframe
150/// df2.Describe().Print()
151/// df2.Describe().Print(/*shortFormat=*/true)
152/// std::cout << df2.Describe().AsString() << std::endl;
153/// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
154/// ~~~
155///
157{
158 // Build set of defined column names to find later in all column names
159 // the defined columns more efficiently
160 const auto columnNames = GetColumnNames();
161 std::set<std::string> definedColumnNamesSet;
162 for (const auto &name : GetDefinedColumnNames())
164
165 // Get information for the metadata table
166 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines", "Event loops run",
167 "Processing slots"};
168 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
169 std::to_string(definedColumnNamesSet.size()),
170 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
171
172 // Set header for metadata table
174 // The column width of the values is required to make right-bound numbers and is equal
175 // to the maximum of the string "Value" and all values to be put in this column.
176 const auto columnWidthValues =
177 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
178 std::stringstream ss;
179 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
180 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
181
182 // Build metadata table
183 // All numbers should be bound to the right and strings bound to the left.
184 for (auto i = 0u; i < metadataProperties.size(); i++) {
185 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
186 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
187 }
188 ss << '\n'; // put space between this and the next table
189
190 // Set header for columns table
192 const auto columnTypes = GetColumnTypeNamesList(columnNames);
194 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
195 << "Origin\n"
196 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
197 << "------\n";
198
199 // Build columns table
200 const auto nCols = columnNames.size();
201 for (auto i = 0u; i < nCols; i++) {
202 auto origin = "Dataset";
204 origin = "Define";
205 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
206 << origin << '\n';
207 }
208 // Use the string returned from DescribeDataset() as the 'brief' description
209 // Use the converted to string stringstream ss as the 'full' description
210 return RDFDescription(DescribeDataset(), ss.str(), GetNFiles());
211}
212
213/// \brief Returns the names of the defined columns.
214/// \return the container of the defined column names.
215///
216/// This is not an action nor a transformation, just a simple utility to
217/// get the columns names that have been defined up to the node.
218/// If no column has been defined, e.g. on a root node, it returns an
219/// empty collection.
220///
221/// ### Example usage:
222/// ~~~{.cpp}
223/// auto defColNames = d.GetDefinedColumnNames();
224/// // Print defined columns' names
225/// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
226/// ~~~
227///
229{
231
232 const auto columns = fColRegister.BuildDefineNames();
233 for (const auto &column : columns) {
235 definedColumns.emplace_back(column);
236 }
237
238 return definedColumns;
239}
240
241/// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
242///
243/// This is not an action nor a transformation, just a simple utility to
244/// inspect the systematic variations that have been registered with Vary() up to this node.
245/// When called on the root node, it returns an empty descriptor.
246///
247/// ### Example usage:
248/// ~~~{.cpp}
249/// auto variations = d.GetVariations();
250/// variations.Print();
251/// ~~~
252///
254{
255 return fColRegister.BuildVariationsDescription();
256}
257
258/// \brief Checks if a column is present in the dataset.
259/// \return true if the column is available, false otherwise
260///
261/// This method checks if a column is part of the input ROOT dataset, has
262/// been defined or can be provided by the data source.
263///
264/// Example usage:
265/// ~~~{.cpp}
266/// ROOT::RDataFrame base(1);
267/// auto rdf = base.Define("definedColumn", [](){return 0;});
268/// rdf.HasColumn("definedColumn"); // true: we defined it
269/// rdf.HasColumn("rdfentry_"); // true: it's always there
270/// rdf.HasColumn("foo"); // false: it is not there
271/// ~~~
273{
274 if (fColRegister.IsDefineOrAlias(columnName))
275 return true;
276
277 if (auto ds = GetDataSource(); ds->HasColumn(columnName))
278 return true;
279
280 return false;
281}
282
283/// \brief Gets the number of data processing slots.
284/// \return The number of data processing slots used by this RDataFrame instance
285///
286/// This method returns the number of data processing slots used by this RDataFrame
287/// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
288///
289/// Example usage:
290/// ~~~{.cpp}
291/// ROOT::EnableImplicitMT(6)
292/// ROOT::RDataFrame df(1);
293/// std::cout << df.GetNSlots() << std::endl; // prints "6"
294/// ~~~
296{
297 return fLoopManager->GetNSlots();
298}
299
300/// \brief Gets the number of event loops run.
301/// \return The number of event loops run by this RDataFrame instance
302///
303/// This method returns the number of events loops run so far by this RDataFrame instance.
304///
305/// Example usage:
306/// ~~~{.cpp}
307/// ROOT::RDataFrame df(1);
308/// std::cout << df.GetNRuns() << std::endl; // prints "0"
309/// df.Sum("rdfentry_").GetValue(); // trigger the event loop
310/// std::cout << df.GetNRuns() << std::endl; // prints "1"
311/// df.Sum("rdfentry_").GetValue(); // trigger another event loop
312/// std::cout << df.GetNRuns() << std::endl; // prints "2"
313/// ~~~
315{
316 return fLoopManager->GetNRuns();
317}
318
320{
321 std::vector<std::string> types;
322
323 for (auto column : columnList) {
324 types.push_back(GetColumnType(column));
325 }
326 return types;
327}
328
330{
332 std::string error(callerName);
333 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
334 throw std::runtime_error(error);
335 }
336}
337
339{
340 // Entry number column
341 const std::string entryColName = "rdfentry_";
342 const std::string entryColType = "ULong64_t";
343 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
344 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
345
346 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
347 ColumnNames_t{}, fColRegister, *fLoopManager);
348 fColRegister.AddDefine(std::move(entryColumn));
349
350 // Slot number column
351 const std::string slotColName = "rdfslot_";
352 const std::string slotColType = "unsigned int";
353 auto slotColGen = [](unsigned int slot) { return slot; };
354 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
355
356 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
357 fColRegister, *fLoopManager);
358 fColRegister.AddDefine(std::move(slotColumn));
359
360 fColRegister.AddAlias("tdfentry_", entryColName);
361 fColRegister.AddAlias("tdfslot_", slotColName);
362}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
char name[80]
Definition TGX11.cxx:110
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
unsigned int GetNRuns() const
Gets the number of event loops run.
RDataSource * GetDataSource() const
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
A descriptor for the systematic variations known to a given RDataFrame node.
const_iterator begin() const
const_iterator end() const
const Int_t n
Definition legend1.C:16
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:474
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:648
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:312
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:465
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:637
std::vector< std::string > ColumnNames_t
Namespace for new ROOT classes and functions.
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:600