Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
14#include "ROOT/RVec.hxx"
17#include <ROOT/RDF/RDisplay.hxx>
19#include <ROOT/RDataSource.hxx>
20#include <ROOT/RResultPtr.hxx>
21#include <string_view>
22#include <TError.h> // R__ASSERT
23
24#include <memory>
25#include <set>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace RDF {
31
32class RDFDescription;
34
35using ColumnNames_t = std::vector<std::string>;
36
37namespace RDFDetail = ROOT::Detail::RDF;
39
40// clang-format off
41/**
42 * \class ROOT::RDF::RInterfaceBase
43 * \ingroup dataframe
44 * \brief The base public interface to the RDataFrame federation of classes.
45 *
46 * This class contains common methods for all RInterface instantiations.
47 */
48// clang-format on
50protected:
51 ///< The RLoopManager at the root of this computation graph. Never null.
52 std::shared_ptr<ROOT::Detail::RDF::RLoopManager> fLoopManager;
53
54 /// Contains the columns defined up to this node.
56
57 std::string DescribeDataset() const;
58
60
61 void CheckIMTDisabled(std::string_view callerName);
62
63 void AddDefaultColumns();
64
65 template <typename RetType>
66 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
67 std::string_view variationName)
68 {
69 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
70 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
71 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
72
73 for (auto &colName : colNames) {
76 }
77 RDFInternal::CheckValidCppVarName(variationName, "Vary");
78
79 static_assert(ROOT::Internal::VecOps::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
80
81 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
83 if (!hasInnerRVec)
84 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
85 "does not return an RVec of RVecs.");
86
87 // Check for type mismatches. We are interested in two cases:
88 // - All columns that are going to be varied must be of the same type
89 // - The return type of the expression must match the type of the nominal column
90 auto colTypes = GetColumnTypeNamesList(colNames);
91 auto &&nColTypes = colTypes.size();
92 // Cache type_info when requested
93 std::vector<const std::type_info *> colTypeIDs(nColTypes);
94 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
95 for (decltype(nColTypes) i{}; i < nColTypes; ++i) {
96 // Need to retrieve the type_info for each column. We start with
97 // checking if the column comes from a Define, in which case the
98 // type_info is cached already. Otherwise, we need to retrieve it
99 // via TypeName2TypeID, which eventually might call the interpreter.
100 const auto *define = fColRegister.GetDefine(colNames[i]);
101 colTypeIDs[i] = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
102 // First check: whether the current column type is the same as the first one.
103 if (*colTypeIDs[i] != *colTypeIDs[0]) {
104 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
105 }
106 // Second check: mismatch between varied type and nominal type
107 if (innerTypeID != *colTypeIDs[i])
108 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
109 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
110 colTypes[i] + ").");
111 }
112
113 } else { // we are varying a single column, RetType is RVec<T>
114 const auto &retTypeID = typeid(typename RetType::value_type);
115 const auto &colName = colNames[0]; // we have only one element in there
116 const auto *define = fColRegister.GetDefine(colName);
117 const auto *expectedTypeID =
118 define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(GetColumnType(colName));
119 if (retTypeID != *expectedTypeID)
120 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
121 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
122 GetColumnType(colName) + ").");
123 }
124
125 // when varying multiple columns, they must be different columns
126 if (colNames.size() > 1) {
127 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
128 if (uniqueCols.size() != colNames.size())
129 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
130 }
131 }
132
133 RDFDetail::RLoopManager *GetLoopManager() const { return fLoopManager.get(); }
134 RDataSource *GetDataSource() const { return fLoopManager->GetDataSource(); }
135
136 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
137 {
139 }
140
141 template <typename... ColumnTypes>
142 void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList<ColumnTypes...> typeList)
143 {
144 if (auto dataSource = GetDataSource())
145 RDFInternal::AddDSColumns(validCols, *fLoopManager, *dataSource, typeList, fColRegister);
146 }
147
148 void CheckAndFillDSColumns(const std::vector<std::string> &colNames,
149 const std::vector<const std::type_info *> &colTypeIDs)
150 {
151 if (auto dataSource = GetDataSource())
152 RDFInternal::AddDSColumns(colNames, *fLoopManager, *dataSource, colTypeIDs, fColRegister);
153 }
154
155 /// Create RAction object, return RResultPtr for the action
156 /// Overload for the case in which all column types were specified (no jitting).
157 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
158 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
159 /// for which the constructor arguments of the action helper are different from the returned value.
160 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
161 typename HelperArgType = ActionResultType,
162 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
163 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
164 const std::shared_ptr<HelperArgType> &helperArg,
165 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
166 {
167 constexpr auto nColumns = sizeof...(ColTypes);
168
169 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
171
172 const auto nSlots = fLoopManager->GetNSlots();
173
174 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
176 return MakeResultPtr(r, *fLoopManager, std::move(action));
177 }
178
179 /// Create RAction object, return RResultPtr for the action
180 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
181 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
182 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
183 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
184 typename HelperArgType = ActionResultType,
185 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
187 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
188 const std::shared_ptr<HelperArgType> &helperArg, const std::shared_ptr<RDFNode> &proxiedPtr,
189 const int nColumns = -1, const bool vector2RVec = true)
190 {
191 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
192
193 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
194 const unsigned int nSlots = fLoopManager->GetNSlots();
195
196 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(
197 *fLoopManager, validColumnNames, fColRegister, proxiedPtr->GetVariations(), proxiedPtr);
198
199 auto funcBody = RDFInternal::JitBuildAction(validColumnNames, typeid(HelperArgType), typeid(ActionTag), nullptr,
200 nSlots, fColRegister, GetDataSource(), vector2RVec);
201 fLoopManager->RegisterJitHelperCall(funcBody,
202 std::make_unique<ROOT::Internal::RDF::RColumnRegister>(fColRegister),
203 validColumnNames, jittedAction, helperArg);
204 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
205 }
206
207public:
208 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
209 RInterfaceBase(RDFDetail::RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister);
210
213
214 std::string GetColumnType(std::string_view column);
215
217
219 bool HasColumn(std::string_view columnName);
221 unsigned int GetNSlots() const;
222 unsigned int GetNRuns() const;
223 unsigned int GetNFiles();
224};
225} // namespace RDF
226} // namespace ROOT
227
228#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
ColumnNames_t GetDatasetTopLevelFieldNames()
Retrieve the names of top-level field names.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RDataSource * GetDataSource() const
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
void CheckAndFillDSColumns(const std::vector< std::string > &colNames, const std::vector< const std::type_info * > &colTypeIDs)
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
Smart pointer for the return type of actions.
A descriptor for the systematic variations known to a given RDataFrame node.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:86
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:191
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, const bool vector2RVec)
std::vector< std::string > ColumnNames_t
Lightweight storage for a collection of types.