Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
14#include "ROOT/RVec.hxx"
17#include <ROOT/RDF/RDisplay.hxx>
19#include <ROOT/RDataSource.hxx>
20#include <ROOT/RResultPtr.hxx>
21#include <string_view>
22#include <TError.h> // R__ASSERT
23
24#include <memory>
25#include <set>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace RDF {
31
32class RDFDescription;
33class RVariationsDescription;
34
35using ColumnNames_t = std::vector<std::string>;
36
39
40// clang-format off
41/**
42 * \class ROOT::Internal::RDF::RInterfaceBase
43 * \ingroup dataframe
44 * \brief The public interface to the RDataFrame federation of classes.
45 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
46 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
47 *
48 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
49 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
50 */
51// clang-format on
53protected:
54 ///< The RLoopManager at the root of this computation graph. Never null.
55 std::shared_ptr<ROOT::Detail::RDF::RLoopManager> fLoopManager;
56
57 /// Contains the columns defined up to this node.
59
60 std::string DescribeDataset() const;
61
63
64 void CheckIMTDisabled(std::string_view callerName);
65
66 void AddDefaultColumns();
67
68 template <typename RetType>
69 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
70 std::string_view variationName)
71 {
72 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
73 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
74 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
75
76 for (auto &colName : colNames) {
79 }
81
82 static_assert(ROOT::Internal::VecOps::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
83
84 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
86 if (!hasInnerRVec)
87 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
88 "does not return an RVec of RVecs.");
89
90 // Check for type mismatches. We are interested in two cases:
91 // - All columns that are going to be varied must be of the same type
92 // - The return type of the expression must match the type of the nominal column
94 auto &&nColTypes = colTypes.size();
95 // Cache type_info when requested
96 std::vector<const std::type_info *> colTypeIDs(nColTypes);
97 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
98 for (decltype(nColTypes) i{}; i < nColTypes; ++i) {
99 // Need to retrieve the type_info for each column. We start with
100 // checking if the column comes from a Define, in which case the
101 // type_info is cached already. Otherwise, we need to retrieve it
102 // via TypeName2TypeID, which eventually might call the interpreter.
103 const auto *define = fColRegister.GetDefine(colNames[i]);
104 colTypeIDs[i] = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
105 // First check: whether the current column type is the same as the first one.
106 if (*colTypeIDs[i] != *colTypeIDs[0]) {
107 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
108 }
109 // Second check: mismatch between varied type and nominal type
110 if (innerTypeID != *colTypeIDs[i])
111 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
112 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
113 colTypes[i] + ").");
114 }
115
116 } else { // we are varying a single column, RetType is RVec<T>
117 const auto &retTypeID = typeid(typename RetType::value_type);
118 const auto &colName = colNames[0]; // we have only one element in there
119 const auto *define = fColRegister.GetDefine(colName);
120 const auto *expectedTypeID =
121 define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(GetColumnType(colName));
123 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
124 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
125 GetColumnType(colName) + ").");
126 }
127
128 // when varying multiple columns, they must be different columns
129 if (colNames.size() > 1) {
130 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
131 if (uniqueCols.size() != colNames.size())
132 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
133 }
134 }
135
137 RDataSource *GetDataSource() const { return fLoopManager->GetDataSource(); }
138
143
144 template <typename... ColumnTypes>
150
151 /// Create RAction object, return RResultPtr for the action
152 /// Overload for the case in which all column types were specified (no jitting).
153 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
154 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
155 /// for which the constructor arguments of the action helper are different from the returned value.
156 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
158 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
159 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
160 const std::shared_ptr<HelperArgType> &helperArg,
161 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
162 {
163 constexpr auto nColumns = sizeof...(ColTypes);
164
167
168 const auto nSlots = fLoopManager->GetNSlots();
169
170 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
172 return MakeResultPtr(r, *fLoopManager, std::move(action));
173 }
174
175 /// Create RAction object, return RResultPtr for the action
176 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
177 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
178 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
179 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
181 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
183 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
184 const std::shared_ptr<HelperArgType> &helperArg, const std::shared_ptr<RDFNode> &proxiedPtr,
185 const int nColumns = -1, const bool vector2RVec = true)
186 {
187 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
188
190 const unsigned int nSlots = fLoopManager->GetNSlots();
191
192 auto *tree = fLoopManager->GetTree();
193 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
194
195 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(proxiedPtr));
196
197 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager, validColumnNames,
198 fColRegister, proxiedPtr->GetVariations());
199 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
200
204 fLoopManager->ToJitExec(toJit);
205 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
206 }
207
208public:
209 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
211
213
214 std::string GetColumnType(std::string_view column);
215
217
219 bool HasColumn(std::string_view columnName);
221 unsigned int GetNSlots() const;
222 unsigned int GetNRuns() const;
223 unsigned int GetNFiles();
224};
225} // namespace RDF
226} // namespace ROOT
227
228#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RDataSource * GetDataSource() const
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
A descriptor for the systematic variations known to a given RDataFrame node.
const_iterator begin() const
const_iterator end() const
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:66
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:123
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap, const bool vector2RVec)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Lightweight storage for a collection of types.