Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
14#include "ROOT/RVec.hxx"
17#include <ROOT/RDF/RDisplay.hxx>
19#include <ROOT/RDataSource.hxx>
20#include <ROOT/RResultPtr.hxx>
21#include <string_view>
22#include <TError.h> // R__ASSERT
23
24#include <memory>
25#include <set>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace RDF {
31
32class RDFDescription;
33class RVariationsDescription;
34
35using ColumnNames_t = std::vector<std::string>;
36
39
40// clang-format off
41/**
42 * \class ROOT::RDF::RInterfaceBase
43 * \ingroup dataframe
44 * \brief The base public interface to the RDataFrame federation of classes.
45 *
46 * This class contains common methods for all RInterface instantiations.
47 */
48// clang-format on
50protected:
51 ///< The RLoopManager at the root of this computation graph. Never null.
52 std::shared_ptr<ROOT::Detail::RDF::RLoopManager> fLoopManager;
53
54 /// Contains the columns defined up to this node.
56
57 std::string DescribeDataset() const;
58
60
61 void CheckIMTDisabled(std::string_view callerName);
62
63 void AddDefaultColumns();
64
65 template <typename RetType>
66 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
67 std::string_view variationName)
68 {
69 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
70 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
71 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
72
73 for (auto &colName : colNames) {
76 }
78
79 static_assert(ROOT::Internal::VecOps::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
80
81 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
83 if (!hasInnerRVec)
84 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
85 "does not return an RVec of RVecs.");
86
87 // Check for type mismatches. We are interested in two cases:
88 // - All columns that are going to be varied must be of the same type
89 // - The return type of the expression must match the type of the nominal column
91 auto &&nColTypes = colTypes.size();
92 // Cache type_info when requested
93 std::vector<const std::type_info *> colTypeIDs(nColTypes);
94 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
95 for (decltype(nColTypes) i{}; i < nColTypes; ++i) {
96 // Need to retrieve the type_info for each column. We start with
97 // checking if the column comes from a Define, in which case the
98 // type_info is cached already. Otherwise, we need to retrieve it
99 // via TypeName2TypeID, which eventually might call the interpreter.
100 const auto *define = fColRegister.GetDefine(colNames[i]);
101 colTypeIDs[i] = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
102 // First check: whether the current column type is the same as the first one.
103 if (*colTypeIDs[i] != *colTypeIDs[0]) {
104 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
105 }
106 // Second check: mismatch between varied type and nominal type
107 if (innerTypeID != *colTypeIDs[i])
108 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
109 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
110 colTypes[i] + ").");
111 }
112
113 } else { // we are varying a single column, RetType is RVec<T>
114 const auto &retTypeID = typeid(typename RetType::value_type);
115 const auto &colName = colNames[0]; // we have only one element in there
116 const auto *define = fColRegister.GetDefine(colName);
117 const auto *expectedTypeID =
118 define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(GetColumnType(colName));
120 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
121 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
122 GetColumnType(colName) + ").");
123 }
124
125 // when varying multiple columns, they must be different columns
126 if (colNames.size() > 1) {
127 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
128 if (uniqueCols.size() != colNames.size())
129 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
130 }
131 }
132
134 RDataSource *GetDataSource() const { return fLoopManager->GetDataSource(); }
135
140
141 template <typename... ColumnTypes>
147
148 void CheckAndFillDSColumns(const std::vector<std::string> &colNames,
149 const std::vector<const std::type_info *> &colTypeIDs)
150 {
151 if (auto dataSource = GetDataSource())
152 RDFInternal::AddDSColumns(colNames, *fLoopManager, *dataSource, colTypeIDs, fColRegister);
153 }
154
155 /// Create RAction object, return RResultPtr for the action
156 /// Overload for the case in which all column types were specified (no jitting).
157 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
158 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
159 /// for which the constructor arguments of the action helper are different from the returned value.
160 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
162 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
163 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
164 const std::shared_ptr<HelperArgType> &helperArg,
165 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
166 {
167 constexpr auto nColumns = sizeof...(ColTypes);
168
171
172 const auto nSlots = fLoopManager->GetNSlots();
173
174 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
176 return MakeResultPtr(r, *fLoopManager, std::move(action));
177 }
178
179 /// Create RAction object, return RResultPtr for the action
180 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
181 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
182 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
183 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
185 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
187 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
188 const std::shared_ptr<HelperArgType> &helperArg, const std::shared_ptr<RDFNode> &proxiedPtr,
189 const int nColumns = -1, const bool vector2RVec = true)
190 {
191 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
192
194 const unsigned int nSlots = fLoopManager->GetNSlots();
195
196 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
197
198 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(proxiedPtr));
199
200 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager, validColumnNames,
201 fColRegister, proxiedPtr->GetVariations());
202 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
203
205 typeid(ActionTag), helperArgOnHeap, nullptr, nSlots, fColRegister,
207 fLoopManager->ToJitExec(toJit);
208 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
209 }
210
211public:
212 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
214
216
217 std::string GetColumnType(std::string_view column);
218
220
222 bool HasColumn(std::string_view columnName);
224 unsigned int GetNSlots() const;
225 unsigned int GetNRuns() const;
226 unsigned int GetNFiles();
227};
228} // namespace RDF
229} // namespace ROOT
230
231#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
The base public interface to the RDataFrame federation of classes.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RDataSource * GetDataSource() const
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
void CheckAndFillDSColumns(const std::vector< std::string > &colNames, const std::vector< const std::type_info * > &colTypeIDs)
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
A descriptor for the systematic variations known to a given RDataFrame node.
const_iterator begin() const
const_iterator end() const
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:73
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap, const bool vector2RVec)
std::vector< std::string > ColumnNames_t
Lightweight storage for a collection of types.