Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFColumnRegister.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
16#include "ROOT/RDF/Utils.hxx" // IsStrInVec
17
18#include <cassert>
19#include <set>
20
21namespace ROOT {
22namespace Internal {
23namespace RDF {
24
26 : fLoopManager(lm),
27 fVariations(std::make_shared<const VariationsMap_t>()),
28 fDefines(std::make_shared<const DefinesMap_t>()),
29 fAliases(std::make_shared<const AliasesMap_t>())
30{
31 // TODO: The RLoopManager could be taken by reference.
32 // We cannot do it now because it would prevent RColumRegister from
33 // being copiable, and that is currently done.
34 assert(fLoopManager != nullptr);
35}
36
37////////////////////////////////////////////////////////////////////////////
38/// \brief Return the list of the names of defined columns (no aliases).
39std::vector<std::string_view> RColumnRegister::BuildDefineNames() const
40{
41 std::vector<std::string_view> names;
42 names.reserve(fDefines->size());
43 for (const auto &kv : *fDefines) {
44 names.push_back(kv.first);
45 }
46 return names;
47}
48
49////////////////////////////////////////////////////////////////////////////
50/// \brief Return the RDefine for the requested column name, or nullptr.
51RDFDetail::RDefineBase *RColumnRegister::GetDefine(std::string_view colName) const
52{
53 auto it = std::find_if(fDefines->begin(), fDefines->end(),
54 [&colName](const DefinesMap_t::value_type &kv) { return kv.first == colName; });
55 return it == fDefines->end() ? nullptr : &it->second->GetDefine();
56}
57
58////////////////////////////////////////////////////////////////////////////
59/// \brief Check if the provided name is tracked in the names list
60bool RColumnRegister::IsDefineOrAlias(std::string_view name) const
61{
62 return IsDefine(name) || IsAlias(name);
63}
64
65////////////////////////////////////////////////////////////////////////////
66/// \brief Add a new defined column.
67/// Registers the pair (columnName, columnReader) with the current RDataFrame,
68/// then keeps a reference to the inserted objects to keep track of the
69/// available columns for this node. Internally it recreates the collection with
70/// the new column, and swaps it with the old one.
71void RColumnRegister::AddDefine(std::shared_ptr<RDFDetail::RDefineBase> define)
72{
73 const auto colIt = fLoopManager->GetColumnNamesCache().Insert(define->GetName());
74 auto insertion_defs = fLoopManager->GetUniqueDefinesWithReaders().insert(
75 {*colIt, std::make_unique<ROOT::Internal::RDF::RDefinesWithReaders>(define, fLoopManager->GetNSlots(),
77
78 auto newDefines = std::make_shared<DefinesMap_t>(*fDefines);
79 // Structured bindings cannot be used in lambda captures (until C++20)
80 // so we explicitly define variable names here
81 const auto &colAndDefIt = insertion_defs.first;
82 // If there is a Redefine, we need to reinsert the new pointer to the readers into the same element
83 if (auto previousDefIt =
84 std::find_if(newDefines->begin(), newDefines->end(),
85 [&colAndDefIt](const DefinesMap_t::value_type &kv) { return kv.first == colAndDefIt->first; });
86 previousDefIt != newDefines->end()) {
87 previousDefIt->second = colAndDefIt->second.get();
88 } else {
89 newDefines->push_back({colAndDefIt->first, colAndDefIt->second.get()});
90 }
91 fDefines = std::move(newDefines);
92}
93
94////////////////////////////////////////////////////////////////////////////
95/// \brief Register a new systematic variation.
96void RColumnRegister::AddVariation(std::shared_ptr<RVariationBase> variation)
97{
98 auto newVariations = std::make_shared<VariationsMap_t>(*fVariations);
99 const std::vector<std::string> &colNames = variation->GetColumnNames();
100
101 // Cache column names for this variation and store views for later use
102 for (const auto &colName : colNames) {
103 auto colIt = fLoopManager->GetColumnNamesCache().Insert(colName);
104 auto [colAndVariationsIt, _2] = fLoopManager->GetUniqueVariationsWithReaders().insert(
105 {*colIt, std::make_unique<ROOT::Internal::RDF::RVariationsWithReaders>(variation, fLoopManager->GetNSlots())});
106 newVariations->insert({colAndVariationsIt->first, colAndVariationsIt->second.get()});
107 }
108
109 fVariations = std::move(newVariations);
110}
111
112////////////////////////////////////////////////////////////////////////////
113/// \brief Get the names of the variations that directly provide alternative values for this column.
114std::vector<std::string> RColumnRegister::GetVariationsFor(const std::string &column) const
115{
116 std::vector<std::string> variations;
117 auto range = fVariations->equal_range(column);
118 for (auto it = range.first; it != range.second; ++it)
119 for (const auto &variationName : it->second->GetVariation().GetVariationNames())
120 variations.emplace_back(variationName);
121
122 return variations;
123}
124
125////////////////////////////////////////////////////////////////////////////
126/// \brief Get the names of all variations that directly or indirectly affect a given column.
127///
128/// This list includes variations applied to the column as well as variations applied to other
129/// columns on which the value of this column depends (typically via a Define expression).
130std::vector<std::string> RColumnRegister::GetVariationDeps(const std::string &column) const
131{
132 return GetVariationDeps(std::vector<std::string>{column});
133}
134
135////////////////////////////////////////////////////////////////////////////
136/// \brief Get the names of all variations that directly or indirectly affect the specified columns.
137///
138/// This list includes variations applied to the columns as well as variations applied to other
139/// columns on which the value of any of these columns depend (typically via Define expressions).
140std::vector<std::string> RColumnRegister::GetVariationDeps(const std::vector<std::string> &columns) const
141{
142 // here we assume that columns do not contain aliases, they must have already been resolved
143 std::set<std::string> variationNames;
144
145 for (const auto &col : columns) {
146 const auto &variations = GetVariationsFor(col);
147 for (const auto &var : variations)
148 variationNames.insert(var);
149
150 // For Define'd columns, add the systematic variations they depend on to the set
151 auto defineIt = std::find_if(fDefines->begin(), fDefines->end(),
152 [&col](const DefinesMap_t::value_type &kv) { return kv.first == col; });
153 if (defineIt != fDefines->end()) {
154 for (const auto &v : defineIt->second->GetDefine().GetVariations())
155 variationNames.insert(v);
156 }
157 }
158
159 return {variationNames.begin(), variationNames.end()};
160}
161
162////////////////////////////////////////////////////////////////////////////
163/// \brief Return the RVariationsWithReaders object that handles the specified variation of the specified column, or
164/// null.
166RColumnRegister::FindVariationAndReaders(const std::string &colName, const std::string &variationName)
167{
168 auto range = fVariations->equal_range(colName);
169 if (range.first == fVariations->end())
170 return nullptr;
171 for (auto it = range.first; it != range.second; ++it) {
172 if (IsStrInVec(variationName, it->second->GetVariation().GetVariationNames()))
173 return it->second;
174 }
175
176 return nullptr;
177}
178
180{
181 std::set<const RVariationBase *> uniqueVariations;
182 for (auto &e : *fVariations)
183 uniqueVariations.insert(&e.second->GetVariation());
184
185 const std::vector<const RVariationBase *> variations(uniqueVariations.begin(), uniqueVariations.end());
186 return ROOT::RDF::RVariationsDescription{variations};
187}
188
189////////////////////////////////////////////////////////////////////////////
190/// \brief Add a new alias to the ledger.
191/// Registers the strings alias, colName with the current RDataFrame, then uses
192/// references to those string to create the new pair for the collection of
193/// aliases of this node.
194void RColumnRegister::AddAlias(std::string_view alias, std::string_view colName)
195{
196 // at this point validation of alias and colName has already happened, we trust that
197 // this is a new, valid alias.
198 auto &colNamesCache = fLoopManager->GetColumnNamesCache();
199 auto aliasIt = colNamesCache.Insert(std::string(alias));
200 auto colIt = colNamesCache.Insert(std::string(colName));
201
202 auto newAliases = std::make_shared<AliasesMap_t>(*fAliases);
203 // If an alias was already present we need to substitute it with the new one
204 if (auto previousAliasIt =
205 std::find_if(newAliases->begin(), newAliases->end(),
206 [&aliasIt](const AliasesMap_t::value_type &kv) { return kv.first == *aliasIt; });
207 previousAliasIt != newAliases->end()) {
208 previousAliasIt->second = ResolveAlias(*colIt);
209 } else {
210 newAliases->push_back({*aliasIt, ResolveAlias(*colIt)});
211 }
212
213 fAliases = std::move(newAliases);
214}
215
216////////////////////////////////////////////////////////////////////////////
217/// \brief Return true if the given column name is an existing alias.
218bool RColumnRegister::IsAlias(std::string_view name) const
219{
220 return std::find_if(fAliases->begin(), fAliases->end(),
221 [&name](const AliasesMap_t::value_type &kv) { return kv.first == name; }) != fAliases->end();
222}
223
224////////////////////////////////////////////////////////////////////////////////
225/// \brief Return true if the given column name is an existing defined column.
226bool RColumnRegister::IsDefine(std::string_view name) const
227{
228 return std::find_if(fDefines->begin(), fDefines->end(),
229 [&name](const DefinesMap_t::value_type &kv) { return kv.first == name; }) != fDefines->end();
230}
231
232////////////////////////////////////////////////////////////////////////////
233/// \brief Return the actual column name that the alias resolves to.
234/// Drills through multiple levels of aliasing if needed.
235/// Returns the input in case it's not an alias.
236/// Expands `#%var` to `R_rdf_sizeof_var` (the #%var columns are implicitly-defined aliases).
237std::string_view RColumnRegister::ResolveAlias(std::string_view alias) const
238{
239
240 // #var is an alias for R_rdf_sizeof_var
241 if (alias.size() > 1 && alias[0] == '#') {
242 std::string sizeof_colname{"R_rdf_sizeof_"};
243 sizeof_colname.append(alias.substr(1));
244 auto colIt = fLoopManager->GetColumnNamesCache().Insert(sizeof_colname);
245 return *colIt;
246 }
247
248 if (auto it = std::find_if(fAliases->begin(), fAliases->end(),
249 [&alias](const AliasesMap_t::value_type &kv) { return kv.first == alias; });
250 it != fAliases->end())
251 return it->second;
252
253 return alias; // not an alias, i.e. already resolved
254}
255
256/// Return a RDefineReader or a RVariationReader, or nullptr if not available.
257/// If requestedType does not match the actual type of the Define or Variation, an exception is thrown.
258RDFDetail::RColumnReaderBase *RColumnRegister::GetReader(unsigned int slot, const std::string &colName,
259 const std::string &variationName,
260 const std::type_info &requestedType)
261{
262 // try variations first
263 if (variationName != "nominal") {
264 auto *variationAndReaders = FindVariationAndReaders(colName, variationName);
265 if (variationAndReaders != nullptr) {
266 const auto &actualType = variationAndReaders->GetVariation().GetTypeId();
267 CheckReaderTypeMatches(actualType, requestedType, colName);
268 return &variationAndReaders->GetReader(slot, colName, variationName);
269 }
270 }
271
272 // otherwise try defines
273 auto it = std::find_if(fDefines->begin(), fDefines->end(),
274 [&colName](const DefinesMap_t::value_type &kv) { return kv.first == colName; });
275 if (it != fDefines->end()) {
276 const auto &actualType = it->second->GetDefine().GetTypeId();
277 CheckReaderTypeMatches(actualType, requestedType, colName);
278 return &it->second->GetReader(slot, variationName);
279 }
280
281 return nullptr;
282}
283
284/// Return a RDefineReader or a RVariationReader, or nullptr if not available.
285/// No type checking is done on the requested reader.
287RColumnRegister::GetReaderUnchecked(unsigned int slot, const std::string &colName, const std::string &variationName)
288{
289 // try variations first
290 if (variationName != "nominal") {
291 if (auto *variationAndReaders = FindVariationAndReaders(colName, variationName)) {
292 return &variationAndReaders->GetReader(slot, colName, variationName);
293 }
294 }
295
296 // otherwise try defines
297 if (auto it =
298 std::find_if(fDefines->begin(), fDefines->end(), [&colName](const auto &kv) { return kv.first == colName; });
299 it != fDefines->end()) {
300 return &it->second->GetReader(slot, variationName);
301 }
302
303 return nullptr;
304}
305
306} // namespace RDF
307} // namespace Internal
308} // namespace ROOT
309
311{
312 std::vector<std::string_view> ret;
313 ret.reserve(fDefines->size() + fAliases->size());
314 for (const auto &kv : *fDefines)
315 ret.push_back(kv.first);
316 for (const auto &kv : *fAliases)
317 ret.push_back(kv.first);
318 return ret;
319}
#define e(i)
Definition RSha256.hxx:103
char name[80]
Definition TGX11.cxx:110
#define _2(A, B)
Definition cfortran.h:109
The head node of a RDF computation graph.
std::set< std::pair< std::string_view, std::unique_ptr< ROOT::Internal::RDF::RVariationsWithReaders > > > & GetUniqueVariationsWithReaders()
std::set< std::pair< std::string_view, std::unique_ptr< ROOT::Internal::RDF::RDefinesWithReaders > > > & GetUniqueDefinesWithReaders()
ROOT::Internal::RDF::RStringCache & GetColumnNamesCache()
std::vector< std::string_view > GenerateColumnNames() const
Return the list of the names of the defined columns (Defines + Aliases).
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
std::vector< std::string_view > BuildDefineNames() const
Return the list of the names of defined columns (no aliases).
std::vector< std::pair< std::string_view, std::string_view > > AliasesMap_t
std::vector< std::string > GetVariationsFor(const std::string &column) const
Get the names of the variations that directly provide alternative values for this column.
std::string_view ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
RDFDetail::RColumnReaderBase * GetReaderUnchecked(unsigned int slot, const std::string &colName, const std::string &variationName)
Return a RDefineReader or a RVariationReader, or nullptr if not available.
std::shared_ptr< const VariationsMap_t > fVariations
Immutable multimap of Variations, can be shared among several nodes.
RDFDetail::RColumnReaderBase * GetReader(unsigned int slot, const std::string &colName, const std::string &variationName, const std::type_info &tid)
Return a RDefineReader or a RVariationReader, or nullptr if not available.
std::shared_ptr< const DefinesMap_t > fDefines
Immutable collection of Defines, can be shared among several nodes.
std::vector< std::pair< std::string_view, ROOT::Internal::RDF::RDefinesWithReaders * > > DefinesMap_t
bool IsDefine(std::string_view name) const
Return true if the given column name is an existing defined column.
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
std::shared_ptr< const AliasesMap_t > fAliases
Immutable map of Aliases, can be shared among several nodes.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
RColumnRegister(ROOT::Detail::RDF::RLoopManager *lm)
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
ROOT::RDF::RVariationsDescription BuildVariationsDescription() const
ROOT::Detail::RDF::RLoopManager * fLoopManager
The head node of the computation graph this register belongs to. Never null.
std::unordered_multimap< std::string_view, ROOT::Internal::RDF::RVariationsWithReaders * > VariationsMap_t
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RVariationsWithReaders * FindVariationAndReaders(const std::string &colName, const std::string &variationName)
Return the RVariationsWithReaders object that handles the specified variation of the specified column...
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
Definition RDFUtils.cxx:444
A descriptor for the systematic variations known to a given RDataFrame node.
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:439
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
Definition RDFUtils.cxx:402
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...