Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFColumnRegister.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
16#include "ROOT/RDF/Utils.hxx" // IsStrInVec
17
18#include <cassert>
19#include <set>
20
21namespace ROOT {
22namespace Internal {
23namespace RDF {
24
25RDefinesWithReaders::RDefinesWithReaders(std::shared_ptr<RDefineBase> define, unsigned int nSlots)
26 : fDefine(std::move(define)), fReadersPerVariation(nSlots)
27{
28 assert(fDefine != nullptr);
29}
30
31RDefineReader &RDefinesWithReaders::GetReader(unsigned int slot, const std::string &variationName)
32{
33 auto &defineReaders = fReadersPerVariation[slot];
34
35 auto it = defineReaders.find(variationName);
36 if (it != defineReaders.end())
37 return *it->second;
38
39 auto *define = fDefine.get();
40 if (variationName != "nominal")
41 define = &define->GetVariedDefine(variationName);
42
43#if !defined(__clang__) && __GNUC__ >= 7 && __GNUC_MINOR__ >= 3
44 const auto insertion = defineReaders.insert({variationName, std::make_unique<RDefineReader>(slot, *define)});
45 return *insertion.first->second;
46#else
47 // gcc < 7.3 has issues with passing the non-movable std::pair temporary into the insert call
48 auto reader = std::make_unique<RDefineReader>(slot, *define);
49 auto &ret = *reader;
50 defineReaders[variationName] = std::move(reader);
51 return ret;
52#endif
53}
54
55RVariationsWithReaders::RVariationsWithReaders(std::shared_ptr<RVariationBase> variation, unsigned int nSlots)
56 : fVariation(std::move(variation)), fReadersPerVariation(nSlots)
57{
58 assert(fVariation != nullptr);
59}
60
61////////////////////////////////////////////////////////////////////////////
62/// Return a column reader for the given slot, column and variation.
64RVariationsWithReaders::GetReader(unsigned int slot, const std::string &colName, const std::string &variationName)
65{
66 assert(IsStrInVec(variationName, fVariation->GetVariationNames()));
67 assert(IsStrInVec(colName, fVariation->GetColumnNames()));
68
69 auto &varReaders = fReadersPerVariation[slot];
70
71 auto it = varReaders.find(variationName);
72 if (it != varReaders.end())
73 return *it->second;
74
75#if !defined(__clang__) && __GNUC__ >= 7 && __GNUC_MINOR__ >= 3
76 const auto insertion =
77 varReaders.insert({variationName, std::make_unique<RVariationReader>(slot, colName, variationName, *fVariation)});
78 return *insertion.first->second;
79#else
80 // gcc < 7.3 has issues with passing the non-movable std::pair temporary into the insert call
81 auto reader = std::make_unique<RVariationReader>(slot, colName, variationName, *fVariation);
82 auto &ret = *reader;
83 varReaders[variationName] = std::move(reader);
84 return ret;
85#endif
86}
87
88RColumnRegister::RColumnRegister(std::shared_ptr<RDFDetail::RLoopManager> lm)
89 : fLoopManager(lm), fDefines(std::make_shared<DefinesMap_t>()),
90 fAliases(std::make_shared<std::unordered_map<std::string, std::string>>()),
91 fVariations(std::make_shared<VariationsMap_t>()), fColumnNames(std::make_shared<ColumnNames_t>())
92{
93}
94
96{
97 // Explicitly empty the containers to decrement the reference count of the
98 // various shared_ptr's, which might cause destructors to be called. For
99 // this, we need the RLoopManager to stay around and we do not want to rely
100 // on the order during member destruction.
101 fAliases.reset();
102 fDefines.reset();
103 fVariations.reset();
104 fColumnNames.reset();
105}
106
107////////////////////////////////////////////////////////////////////////////
108/// \brief Return the list of the names of defined columns (no aliases).
110{
111 ColumnNames_t names;
112 names.reserve(fDefines->size());
113 for (auto &kv : *fDefines) {
114 names.emplace_back(kv.first);
115 }
116 return names;
117}
118
119////////////////////////////////////////////////////////////////////////////
120/// \brief Return the RDefine for the requested column name, or nullptr.
121RDFDetail::RDefineBase *RColumnRegister::GetDefine(const std::string &colName) const
122{
123 auto it = fDefines->find(colName);
124 return it == fDefines->end() ? nullptr : &it->second->GetDefine();
125}
126
127////////////////////////////////////////////////////////////////////////////
128/// \brief Check if the provided name is tracked in the names list
129bool RColumnRegister::IsDefineOrAlias(std::string_view name) const
130{
131 const auto ccolnamesEnd = fColumnNames->end();
132 return ccolnamesEnd != std::find(fColumnNames->begin(), ccolnamesEnd, name);
133}
134
135////////////////////////////////////////////////////////////////////////////
136/// \brief Add a new defined column.
137/// Internally it recreates the map with the new column, and swaps it with the old one.
138void RColumnRegister::AddDefine(std::shared_ptr<RDFDetail::RDefineBase> define)
139{
140 auto newDefines = std::make_shared<DefinesMap_t>(*fDefines);
141 const std::string &colName = define->GetName();
142
143 // this will assign over a pre-existing element in case this AddDefine is due to a Redefine
144 (*newDefines)[colName] = std::make_shared<RDefinesWithReaders>(define, fLoopManager->GetNSlots());
145
146 fDefines = std::move(newDefines);
147 AddName(colName);
148}
149
150////////////////////////////////////////////////////////////////////////////
151/// \brief Register a new systematic variation.
152void RColumnRegister::AddVariation(std::shared_ptr<RVariationBase> variation)
153{
154 auto newVariations = std::make_shared<VariationsMap_t>(*fVariations);
155 const std::vector<std::string> &colNames = variation->GetColumnNames();
156 for (auto &colName : colNames)
157 newVariations->insert({colName, std::make_shared<RVariationsWithReaders>(variation, fLoopManager->GetNSlots())});
158 fVariations = std::move(newVariations);
159}
160
161////////////////////////////////////////////////////////////////////////////
162/// \brief Get the names of the variations that directly provide alternative values for this column.
163std::vector<std::string> RColumnRegister::GetVariationsFor(const std::string &column) const
164{
165 std::vector<std::string> variations;
166 auto range = fVariations->equal_range(column);
167 for (auto it = range.first; it != range.second; ++it)
168 for (const auto &variationName : it->second->GetVariation().GetVariationNames())
169 variations.emplace_back(variationName);
170
171 return variations;
172}
173
174////////////////////////////////////////////////////////////////////////////
175/// \brief Get the names of all variations that directly or indirectly affect a given column.
176///
177/// This list includes variations applied to the column as well as variations applied to other
178/// columns on which the value of this column depends (typically via a Define expression).
179std::vector<std::string> RColumnRegister::GetVariationDeps(const std::string &column) const
180{
181 return GetVariationDeps(std::vector<std::string>{column});
182}
183
184////////////////////////////////////////////////////////////////////////////
185/// \brief Get the names of all variations that directly or indirectly affect the specified columns.
186///
187/// This list includes variations applied to the columns as well as variations applied to other
188/// columns on which the value of any of these columns depend (typically via Define expressions).
189std::vector<std::string> RColumnRegister::GetVariationDeps(const ColumnNames_t &columns) const
190{
191 // here we assume that columns do not contain aliases, they must have already been resolved
192 std::set<std::string> variationNames;
193
194 for (const auto &col : columns) {
195 const auto &variations = GetVariationsFor(col);
196 for (const auto &var : variations)
197 variationNames.insert(var);
198
199 // For Define'd columns, add the systematic variations they depend on to the set
200 auto defineIt = fDefines->find(col);
201 if (defineIt != fDefines->end()) {
202 for (const auto &v : defineIt->second->GetDefine().GetVariations())
203 variationNames.insert(v);
204 }
205 }
206
207 return {variationNames.begin(), variationNames.end()};
208}
209
210////////////////////////////////////////////////////////////////////////////
211/// \brief Return the RVariationsWithReaders object that handles the specified variation of the specified column, or
212/// null.
214RColumnRegister::FindVariationAndReaders(const std::string &colName, const std::string &variationName)
215{
216 auto range = fVariations->equal_range(colName);
217 if (range.first == fVariations->end())
218 return nullptr;
219 for (auto it = range.first; it != range.second; ++it) {
220 if (IsStrInVec(variationName, it->second->GetVariation().GetVariationNames()))
221 return it->second.get();
222 }
223
224 return nullptr;
225}
226
227ROOT::RDF::RVariationsDescription RColumnRegister::BuildVariationsDescription() const
228{
229 std::set<const RVariationBase *> uniqueVariations;
230 for (auto &e : *fVariations)
231 uniqueVariations.insert(&e.second->GetVariation());
232
233 const std::vector<const RVariationBase *> variations(uniqueVariations.begin(), uniqueVariations.end());
234 return ROOT::RDF::RVariationsDescription{variations};
235}
236
237////////////////////////////////////////////////////////////////////////////
238/// \brief Add a new name to the list returned by `GetNames` without booking a new column.
239///
240/// This is needed because we abuse fColumnNames to also keep track of the aliases defined
241/// in each branch of the computation graph.
242/// Internally it recreates the vector with the new name, and swaps it with the old one.
243void RColumnRegister::AddName(std::string_view name)
244{
245 const auto &names = *fColumnNames;
246 if (std::find(names.begin(), names.end(), name) != names.end())
247 return; // must be a Redefine of an existing column. Nothing to do.
248
249 auto newColsNames = std::make_shared<ColumnNames_t>(names);
250 newColsNames->emplace_back(std::string(name));
251 fColumnNames = newColsNames;
252}
253
254////////////////////////////////////////////////////////////////////////////
255/// \brief Add a new alias to the ledger.
256void RColumnRegister::AddAlias(std::string_view alias, std::string_view colName)
257{
258 // at this point validation of alias and colName has already happened, we trust that
259 // this is a new, valid alias.
260 auto newAliases = std::make_shared<std::unordered_map<std::string, std::string>>(*fAliases);
261 (*newAliases)[std::string(alias)] = ResolveAlias(colName);
262 fAliases = std::move(newAliases);
263 AddName(alias);
264}
265
266////////////////////////////////////////////////////////////////////////////
267/// \brief Return true if the given column name is an existing alias.
268bool RColumnRegister::IsAlias(const std::string &name) const
269{
270 return fAliases->find(name) != fAliases->end();
271}
272
273////////////////////////////////////////////////////////////////////////////
274/// \brief Return the actual column name that the alias resolves to.
275/// Drills through multiple levels of aliasing if needed.
276/// Returns the input in case it's not an alias.
277/// Expands `#%var` to `R_rdf_sizeof_var` (the #%var columns are implicitly-defined aliases).
278std::string RColumnRegister::ResolveAlias(std::string_view alias) const
279{
280 std::string aliasStr{alias};
281
282 // #var is an alias for R_rdf_sizeof_var
283 if (aliasStr.size() > 1 && aliasStr[0] == '#')
284 return "R_rdf_sizeof_" + aliasStr.substr(1);
285
286 auto it = fAliases->find(aliasStr);
287 if (it != fAliases->end())
288 return it->second;
289
290 return aliasStr; // not an alias, i.e. already resolved
291}
292
293/// Return a RDefineReader or a RVariationReader, or nullptr if not available.
294/// If requestedType does not match the actual type of the Define or Variation, an exception is thrown.
295RDFDetail::RColumnReaderBase *RColumnRegister::GetReader(unsigned int slot, const std::string &colName,
296 const std::string &variationName,
297 const std::type_info &requestedType)
298{
299 // try variations first
300 if (variationName != "nominal") {
301 auto *variationAndReaders = FindVariationAndReaders(colName, variationName);
302 if (variationAndReaders != nullptr) {
303 const auto &actualType = variationAndReaders->GetVariation().GetTypeId();
304 CheckReaderTypeMatches(actualType, requestedType, colName);
305 return &variationAndReaders->GetReader(slot, colName, variationName);
306 }
307 }
308
309 // otherwise try defines
310 auto it = fDefines->find(colName);
311 if (it != fDefines->end()) {
312 const auto &actualType = it->second->GetDefine().GetTypeId();
313 CheckReaderTypeMatches(actualType, requestedType, colName);
314 return &it->second->GetReader(slot, variationName);
315 }
316
317 return nullptr;
318}
319
320} // namespace RDF
321} // namespace Internal
322} // namespace ROOT
#define e(i)
Definition RSha256.hxx:103
char name[80]
Definition TGX11.cxx:110
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
ColumnNames_t BuildDefineNames() const
Return the list of the names of defined columns (no aliases).
std::shared_ptr< const std::unordered_map< std::string, std::string > > fAliases
Immutable map of Aliases, can be shared among several nodes.
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
std::shared_ptr< const ColumnNames_t > fColumnNames
Names of Defines and Aliases registered so far.
std::unordered_multimap< std::string, std::shared_ptr< RVariationsWithReaders > > VariationsMap_t
See fVariations for more information on this type.
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
std::shared_ptr< VariationsMap_t > fVariations
Immutable multimap of Variations, can be shared among several nodes.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
RColumnRegister(const RColumnRegister &)=default
std::shared_ptr< DefinesMap_t > fDefines
Immutable collection of Defines, can be shared among several nodes.
std::shared_ptr< RDFDetail::RLoopManager > fLoopManager
std::vector< std::string > ColumnNames_t
void AddName(std::string_view name)
Add a new name to the list returned by GetNames without booking a new column.
std::unordered_map< std::string, std::shared_ptr< RDefinesWithReaders > > DefinesMap_t
Column reader for defined columns.
std::vector< std::unordered_map< std::string, std::unique_ptr< RDefineReader > > > fReadersPerVariation
RDefineReader & GetReader(unsigned int slot, const std::string &variationName)
std::shared_ptr< RDefineBase > fDefine
RDefinesWithReaders(std::shared_ptr< RDefineBase > define, unsigned int nSlots)
Column reader that reads the value for a specific column, variation and slot.
std::vector< std::unordered_map< std::string, std::unique_ptr< RVariationReader > > > fReadersPerVariation
RVariationsWithReaders(std::shared_ptr< RVariationBase > variation, unsigned int nSlots)
RVariationReader & GetReader(unsigned int slot, const std::string &colName, const std::string &variationName)
Return a column reader for the given slot, column and variation.
std::shared_ptr< RVariationBase > fVariation
A descriptor for the systematic variations known to a given RDataFrame node.
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:424
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
Definition RDFUtils.cxx:384
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...