Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
14#include "ROOT/RVec.hxx"
17#include <ROOT/RDF/RDisplay.hxx>
19#include <ROOT/RDataSource.hxx>
20#include <ROOT/RResultPtr.hxx>
21#include <string_view>
22#include <TError.h> // R__ASSERT
23
24#include <memory>
25#include <set>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace RDF {
31
32class RDFDescription;
33class RVariationsDescription;
34
35using ColumnNames_t = std::vector<std::string>;
36
39
40// clang-format off
41/**
42 * \class ROOT::Internal::RDF::RInterfaceBase
43 * \ingroup dataframe
44 * \brief The public interface to the RDataFrame federation of classes.
45 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
46 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
47 *
48 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
49 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
50 */
51// clang-format on
53protected:
54 ///< The RLoopManager at the root of this computation graph. Never null.
55 std::shared_ptr<ROOT::Detail::RDF::RLoopManager> fLoopManager;
56
57 /// Contains the columns defined up to this node.
59
60 std::string DescribeDataset() const;
61
63
64 void CheckIMTDisabled(std::string_view callerName);
65
66 void AddDefaultColumns();
67
68 template <typename RetType>
69 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
70 std::string_view variationName)
71 {
72 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
73 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
74 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
75
76 for (auto &colName : colNames) {
79 }
81
82 static_assert(ROOT::Internal::VecOps::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
83
84 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
86 if (!hasInnerRVec)
87 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
88 "does not return an RVec of RVecs.");
89
90 // Check for type mismatches. We are interested in two cases:
91 // - All columns that are going to be varied must be of the same type
92 // - The return type of the expression must match the type of the nominal column
94 auto &&nColTypes = colTypes.size();
95 // Cache type_info when requested
96 std::vector<const std::type_info *> colTypeIDs(nColTypes);
97 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
98 for (decltype(nColTypes) i{}; i < nColTypes; ++i) {
99 // Need to retrieve the type_info for each column. We start with
100 // checking if the column comes from a Define, in which case the
101 // type_info is cached already. Otherwise, we need to retrieve it
102 // via TypeName2TypeID, which eventually might call the interpreter.
103 const auto *define = fColRegister.GetDefine(colNames[i]);
104 colTypeIDs[i] = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
105 // First check: whether the current column type is the same as the first one.
106 if (*colTypeIDs[i] != *colTypeIDs[0]) {
107 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
108 }
109 // Second check: mismatch between varied type and nominal type
110 if (innerTypeID != *colTypeIDs[i])
111 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
112 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
113 colTypes[i] + ").");
114 }
115
116 } else { // we are varying a single column, RetType is RVec<T>
117 const auto &retTypeID = typeid(typename RetType::value_type);
118 const auto &colName = colNames[0]; // we have only one element in there
119 const auto *define = fColRegister.GetDefine(colName);
120 const auto *expectedTypeID =
121 define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(GetColumnType(colName));
123 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
124 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
125 GetColumnType(colName) + ").");
126 }
127
128 // when varying multiple columns, they must be different columns
129 if (colNames.size() > 1) {
130 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
131 if (uniqueCols.size() != colNames.size())
132 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
133 }
134 }
135
137 RDataSource *GetDataSource() const { return fLoopManager->GetDataSource(); }
138
143
144 template <typename... ColumnTypes>
150
151 void CheckAndFillDSColumns(const std::vector<std::string> &colNames,
152 const std::vector<const std::type_info *> &colTypeIDs)
153 {
154 if (auto dataSource = GetDataSource())
155 RDFInternal::AddDSColumns(colNames, *fLoopManager, *dataSource, colTypeIDs, fColRegister);
156 }
157
158 /// Create RAction object, return RResultPtr for the action
159 /// Overload for the case in which all column types were specified (no jitting).
160 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
161 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
162 /// for which the constructor arguments of the action helper are different from the returned value.
163 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
165 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
166 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
167 const std::shared_ptr<HelperArgType> &helperArg,
168 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
169 {
170 constexpr auto nColumns = sizeof...(ColTypes);
171
174
175 const auto nSlots = fLoopManager->GetNSlots();
176
177 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
179 return MakeResultPtr(r, *fLoopManager, std::move(action));
180 }
181
182 /// Create RAction object, return RResultPtr for the action
183 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
184 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
185 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
186 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
188 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
190 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
191 const std::shared_ptr<HelperArgType> &helperArg, const std::shared_ptr<RDFNode> &proxiedPtr,
192 const int nColumns = -1, const bool vector2RVec = true)
193 {
194 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
195
197 const unsigned int nSlots = fLoopManager->GetNSlots();
198
199 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
200
201 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(proxiedPtr));
202
203 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager, validColumnNames,
204 fColRegister, proxiedPtr->GetVariations());
205 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
206
208 typeid(ActionTag), helperArgOnHeap, nullptr, nSlots, fColRegister,
210 fLoopManager->ToJitExec(toJit);
211 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
212 }
213
214public:
215 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
217
219
220 std::string GetColumnType(std::string_view column);
221
223
225 bool HasColumn(std::string_view columnName);
227 unsigned int GetNSlots() const;
228 unsigned int GetNRuns() const;
229 unsigned int GetNFiles();
230};
231} // namespace RDF
232} // namespace ROOT
233
234#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RDataSource * GetDataSource() const
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
void CheckAndFillDSColumns(const std::vector< std::string > &colNames, const std::vector< const std::type_info * > &colTypeIDs)
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
A descriptor for the systematic variations known to a given RDataFrame node.
const_iterator begin() const
const_iterator end() const
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:73
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap, const bool vector2RVec)
std::vector< std::string > ColumnNames_t
Namespace for new ROOT classes and functions.
Lightweight storage for a collection of types.