Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
14#include "ROOT/RVec.hxx"
17#include <ROOT/RDF/RDisplay.hxx>
19#include <ROOT/RDataSource.hxx>
20#include <ROOT/RResultPtr.hxx>
21#include <string_view>
22#include <TError.h> // R__ASSERT
23
24#include <memory>
25#include <set>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace RDF {
31
32class RDFDescription;
33class RVariationsDescription;
34
35using ColumnNames_t = std::vector<std::string>;
36
39
40// clang-format off
41/**
42 * \class ROOT::Internal::RDF::RInterfaceBase
43 * \ingroup dataframe
44 * \brief The public interface to the RDataFrame federation of classes.
45 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
46 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
47 *
48 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
49 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
50 */
51// clang-format on
53protected:
54 ///< The RLoopManager at the root of this computation graph. Never null.
56 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
58
59 /// Contains the columns defined up to this node.
61
62 std::string DescribeDataset() const;
63
65
66 void CheckIMTDisabled(std::string_view callerName);
67
68 void AddDefaultColumns();
69
70 template <typename RetType>
71 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
72 std::string_view variationName)
73 {
74 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
75 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
76 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
77
78 for (auto &colName : colNames) {
81 }
82 RDFInternal::CheckValidCppVarName(variationName, "Vary");
83
84 static_assert(ROOT::Internal::VecOps::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
85
86 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
88 if (!hasInnerRVec)
89 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
90 "does not return an RVec of RVecs.");
91
92 auto colTypes = GetColumnTypeNamesList(colNames);
93 auto allColTypesEqual =
94 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
95 if (!allColTypesEqual)
96 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
97
98 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
99
100 for (auto i = 0u; i < colTypes.size(); ++i) {
101 const auto *define = fColRegister.GetDefine(colNames[i]);
102 const auto *expectedTypeID = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
103 if (innerTypeID != *expectedTypeID)
104 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
105 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
106 colTypes[i] + ").");
107 }
108 } else { // we are varying a single column, RetType is RVec<T>
109 const auto &retTypeID = typeid(typename RetType::value_type);
110 const auto &colName = colNames[0]; // we have only one element in there
111 const auto *define = fColRegister.GetDefine(colName);
112 const auto *expectedTypeID =
113 define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(GetColumnType(colName));
114 if (retTypeID != *expectedTypeID)
115 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
116 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
117 GetColumnType(colName) + ").");
118 }
119
120 // when varying multiple columns, they must be different columns
121 if (colNames.size() > 1) {
122 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
123 if (uniqueCols.size() != colNames.size())
124 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
125 }
126 }
127
129
130 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
131 {
133 }
134
135 template <typename... ColumnTypes>
137 {
138 if (fDataSource != nullptr)
139 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fColRegister);
140 }
141
142 /// Create RAction object, return RResultPtr for the action
143 /// Overload for the case in which all column types were specified (no jitting).
144 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
145 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
146 /// for which the constructor arguments of the action helper are different from the returned value.
147 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
148 typename HelperArgType = ActionResultType,
149 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
150 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
151 const std::shared_ptr<HelperArgType> &helperArg,
152 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
153 {
154 constexpr auto nColumns = sizeof...(ColTypes);
155
156 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
158
159 const auto nSlots = fLoopManager->GetNSlots();
160
161 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
163 return MakeResultPtr(r, *fLoopManager, std::move(action));
164 }
165
166 /// Create RAction object, return RResultPtr for the action
167 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
168 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
169 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
170 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
171 typename HelperArgType = ActionResultType,
172 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
173 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
174 const std::shared_ptr<HelperArgType> &helperArg,
175 const std::shared_ptr<RDFNode> &proxiedPtr, const int nColumns = -1)
176 {
177 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
178
179 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
180 const unsigned int nSlots = fLoopManager->GetNSlots();
181
182 auto *tree = fLoopManager->GetTree();
183 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
184
185 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(proxiedPtr));
186
187 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager, validColumnNames,
188 fColRegister, proxiedPtr->GetVariations());
189 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
190
191 auto toJit =
192 RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap, typeid(HelperArgType), typeid(ActionTag),
193 helperArgOnHeap, tree, nSlots, fColRegister, fDataSource, jittedActionOnHeap);
194 fLoopManager->ToJitExec(toJit);
195 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
196 }
197
198public:
199 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
201
203
204 std::string GetColumnType(std::string_view column);
205
207
209 bool HasColumn(std::string_view columnName);
211 unsigned int GetNSlots() const;
212 unsigned int GetNRuns() const;
213 unsigned int GetNFiles();
214};
215} // namespace RDF
216} // namespace ROOT
217
218#endif
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
virtual const std::type_info & GetTypeId() const =0
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
A binder for user-defined columns, variations and aliases.
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
Smart pointer for the return type of actions.
A descriptor for the systematic variations known to a given RDataFrame node.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:51
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:99
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Lightweight storage for a collection of types.