Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include <ROOT/RDataFrame.hxx>
13#include <ROOT/RStringView.hxx>
14#include <ROOT/TSeq.hxx>
15#include <RtypesCore.h>
16#include <TDirectory.h>
17#include <TChain.h>
18#include <TClass.h>
19#include <TClassEdit.h>
20#include <TFriendElement.h>
21#include <TInterpreter.h>
22#include <TObject.h>
23#include <TPRegexp.h>
24#include <TString.h>
25#include <TTree.h>
26
27// pragma to disable warnings on Rcpp which have
28// so many noise compiling
29#if defined(__GNUC__)
30#pragma GCC diagnostic push
31#pragma GCC diagnostic ignored "-Woverloaded-virtual"
32#pragma GCC diagnostic ignored "-Wshadow"
33#endif
34#include "lexertk.hpp"
35#if defined(__GNUC__)
36#pragma GCC diagnostic pop
37#endif
38
39#include <algorithm>
40#include <cassert>
41#include <unordered_set>
42#include <stdexcept>
43#include <string>
44#include <sstream>
45#include <typeinfo>
46
47namespace ROOT {
48namespace Detail {
49namespace RDF {
50class RDefineBase;
51class RFilterBase;
52class RLoopManager;
53class RRangeBase;
54} // namespace RDF
55} // namespace Detail
56
57namespace RDF {
58class RDataSource;
59} // namespace RDF
60
61} // namespace ROOT
62
63namespace {
66
67/// A string expression such as those passed to Filter and Define, digested to a standardized form
68struct ParsedExpression {
69 /// The string expression with the dummy variable names in fVarNames in place of the original column names
70 std::string fExpr;
71 /// The list of valid column names that were used in the original string expression.
72 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
73 ColumnNames_t fUsedCols;
74 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
75 ColumnNames_t fVarNames;
76};
77
78/// Look at expression `expr` and return a pair of (column names used, aliases used)
79static std::pair<ColumnNames_t, ColumnNames_t>
80FindUsedColsAndAliases(const std::string &expr, const ColumnNames_t &treeBranchNames,
81 const ROOT::Internal::RDF::RColumnRegister &customColumns,
82 const ColumnNames_t &dataSourceColNames)
83{
84 lexertk::generator tokens;
85 const auto tokensOk = tokens.process(expr);
86 if (!tokensOk) {
87 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
88 throw std::runtime_error(msg);
89 }
90
91 std::unordered_set<std::string> usedCols;
92 std::unordered_set<std::string> usedAliases;
93
94 // iterate over tokens in expression and fill usedCols and usedAliases
95 const auto nTokens = tokens.size();
96 const auto kSymbol = lexertk::token::e_symbol;
97 for (auto i = 0u; i < nTokens; ++i) {
98 const auto &tok = tokens[i];
99 // lexertk classifies '&' as e_symbol for some reason
100 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
101 // token is not a potential variable name, skip it
102 continue;
103 }
104
105 ColumnNames_t potentialColNames({tok.value});
106
107 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
108 auto dotChainKeepsGoing = [&](unsigned int _i) {
109 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
110 };
111 while (dotChainKeepsGoing(i)) {
112 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
113 i += 2; // consume the tokens we looked at
114 }
115
116 // in an expression such as `a.b`, if `a` is a column alias add it to `usedAliases` and
117 // replace the alias with the real column name in `potentialColNames`.
118 const auto maybeAnAlias = potentialColNames[0]; // intentionally a copy as we'll modify potentialColNames later
119 const auto &resolvedAlias = customColumns.ResolveAlias(maybeAnAlias);
120 if (resolvedAlias != maybeAnAlias) { // this is an alias
121 usedAliases.insert(maybeAnAlias);
122 for (auto &s : potentialColNames)
123 s.replace(0, maybeAnAlias.size(), resolvedAlias);
124 }
125
126 // find the longest potential column name that is an actual column name
127 // (potential columns are sorted by length, so we search from the end to find the longest)
128 auto isRDFColumn = [&](const std::string &col) {
129 if (customColumns.HasName(col) || IsStrInVec(col, treeBranchNames) || IsStrInVec(col, dataSourceColNames))
130 return true;
131 return false;
132 };
133 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
134 if (longestRDFColMatch != potentialColNames.crend())
135 usedCols.insert(*longestRDFColMatch);
136 }
137
138 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
139}
140
141/// Substitute each '.' in a string with '\.'
142static std::string EscapeDots(const std::string &s)
143{
144 TString out(s);
145 TPRegexp dot("\\.");
146 dot.Substitute(out, "\\.", "g");
147 return std::string(std::move(out));
148}
149
150static TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
151 const ROOT::Internal::RDF::RColumnRegister &colRegister)
152{
153 TString out(expr);
154
155 for (const auto &alias : usedAliases) {
156 const auto &col = colRegister.ResolveAlias(alias);
157 TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
158 replacer.Substitute(out, col, "g");
159 }
160
161 return out;
162}
163
164static ParsedExpression ParseRDFExpression(std::string_view expr, const ColumnNames_t &treeBranchNames,
165 const ROOT::Internal::RDF::RColumnRegister &colRegister,
166 const ColumnNames_t &dataSourceColNames)
167{
168 // transform `#var` into `R_rdf_sizeof_var`
169 TString preProcessedExpr(expr);
170 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
171 TPRegexp colSizeReplacer(
172 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
173 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
174
175 ColumnNames_t usedCols;
176 ColumnNames_t usedAliases;
177 std::tie(usedCols, usedAliases) =
178 FindUsedColsAndAliases(std::string(preProcessedExpr), treeBranchNames, colRegister, dataSourceColNames);
179
180 const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
181
182 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
183 // the dummy variable names in varNames
184 TString exprWithVars(exprNoAliases);
185
186 ColumnNames_t varNames(usedCols.size());
187 for (auto i = 0u; i < varNames.size(); ++i)
188 varNames[i] = "var" + std::to_string(i);
189
190 // sort the vector usedColsAndAliases by decreasing length of its elements,
191 // so in case of friends we guarantee we never substitute a column name with another column containing it
192 // ex. without sorting when passing "x" and "fr.x", the replacer would output "var0" and "fr.var0",
193 // because it has already substituted "x", hence the "x" in "fr.x" would be recognized as "var0",
194 // whereas the desired behaviour is handling them as "var0" and "var1"
195 std::sort(usedCols.begin(), usedCols.end(),
196 [](const std::string &a, const std::string &b) { return a.size() > b.size(); });
197 for (const auto &col : usedCols) {
198 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
199 TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
200 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
201 }
202
203 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
204}
205
206/// Return the static global map of Filter/Define lambda expressions that have been jitted.
207/// It's used to check whether a given expression has already been jitted, and
208/// to look up its associated variable name if it is.
209/// Keys in the map are the body of the expression, values are the name of the
210/// jitted variable that corresponds to that expression. For example, for:
211/// auto lambda1 = [] { return 42; };
212/// key would be "[] { return 42; }" and value would be "lambda1".
213static std::unordered_map<std::string, std::string> &GetJittedExprs() {
214 static std::unordered_map<std::string, std::string> jittedExpressions;
215 return jittedExpressions;
216}
217
218static std::string
219BuildLambdaString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
220{
221 assert(vars.size() == varTypes.size());
222
223 TPRegexp re(R"(\breturn\b)");
224 const bool hasReturnStmt = re.MatchB(expr);
225
226 static const std::vector<std::string> fundamentalTypes = {
227 "int",
228 "signed",
229 "signed int",
230 "Int_t",
231 "unsigned",
232 "unsigned int",
233 "UInt_t",
234 "double",
235 "Double_t",
236 "float",
237 "Float_t",
238 "char",
239 "Char_t",
240 "unsigned char",
241 "UChar_t",
242 "bool",
243 "Bool_t",
244 "short",
245 "short int",
246 "Short_t",
247 "long",
248 "long int",
249 "long long int",
250 "Long64_t",
251 "unsigned long",
252 "unsigned long int",
253 "ULong64_t",
254 "std::size_t",
255 "size_t",
256 "Ssiz_t"
257 };
258
259 std::stringstream ss;
260 ss << "[](";
261 for (auto i = 0u; i < vars.size(); ++i) {
262 std::string fullType;
263 const auto &type = varTypes[i];
264 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), type) != fundamentalTypes.end()) {
265 // pass it by const value to help detect common mistakes such as if(x = 3)
266 fullType = "const " + type + " ";
267 } else {
268 // We pass by reference to avoid expensive copies
269 // It can't be const reference in general, as users might want/need to call non-const methods on the values
270 fullType = type + "& ";
271 }
272 ss << fullType << vars[i] << ", ";
273 }
274 if (!vars.empty())
275 ss.seekp(-2, ss.cur);
276
277 if (hasReturnStmt)
278 ss << "){";
279 else
280 ss << "){return ";
281 ss << expr << "\n;}";
282
283 return ss.str();
284}
285
286/// Declare a lambda expression to the interpreter in namespace R_rdf, return the name of the jitted lambda.
287/// If the lambda expression is already in GetJittedExprs, return the name for the lambda that has already been jitted.
288static std::string DeclareLambda(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
289{
291
292 const auto lambdaExpr = BuildLambdaString(expr, vars, varTypes);
293 auto &exprMap = GetJittedExprs();
294 const auto exprIt = exprMap.find(lambdaExpr);
295 if (exprIt != exprMap.end()) {
296 // expression already there
297 const auto lambdaName = exprIt->second;
298 return lambdaName;
299 }
300
301 // new expression
302 const auto lambdaBaseName = "lambda" + std::to_string(exprMap.size());
303 const auto lambdaFullName = "R_rdf::" + lambdaBaseName;
304
305 const auto toDeclare = "namespace R_rdf {\nauto " + lambdaBaseName + " = " + lambdaExpr + ";\nusing " +
306 lambdaBaseName + "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" +
307 lambdaBaseName + ")>::ret_type;\n}";
309
310 // InterpreterDeclare could throw. If it doesn't, mark the lambda as already jitted
311 exprMap.insert({lambdaExpr, lambdaFullName});
312
313 return lambdaFullName;
314}
315
316/// Each jitted lambda comes with a lambda_ret_t type alias for its return type.
317/// Resolve that alias and return the true type as string.
318static std::string RetTypeOfLambda(const std::string &lambdaName)
319{
320 const auto dt = gROOT->GetType((lambdaName + "_ret_t").c_str());
321 R__ASSERT(dt != nullptr);
322 const auto type = dt->GetFullTypeName();
323 return type;
324}
325
326static void GetTopLevelBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, ColumnNames_t &bNames,
327 std::set<TTree *> &analysedTrees, const std::string friendName = "")
328{
329 if (!analysedTrees.insert(&t).second) {
330 return;
331 }
332
333 auto branches = t.GetListOfBranches();
334 if (branches) {
335 for (auto branchObj : *branches) {
336 const auto name = branchObj->GetName();
337 if (bNamesReg.insert(name).second) {
338 bNames.emplace_back(name);
339 } else if (!friendName.empty()) {
340 // If this is a friend and the branch name has already been inserted, it might be because the friend
341 // has a branch with the same name as a branch in the main tree. Let's add it as <friendname>.<branchname>.
342 // If used for a Snapshot, this name will become <friendname>_<branchname> (with an underscore).
343 const auto longName = friendName + "." + name;
344 if (bNamesReg.insert(longName).second)
345 bNames.emplace_back(longName);
346 }
347 }
348 }
349
350 auto friendTrees = t.GetListOfFriends();
351
352 if (!friendTrees)
353 return;
354
355 for (auto friendTreeObj : *friendTrees) {
356 auto friendElement = static_cast<TFriendElement *>(friendTreeObj);
357 auto friendTree = friendElement->GetTree();
358 const std::string frName(friendElement->GetName()); // this gets us the TTree name or the friend alias if any
359 GetTopLevelBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName);
360 }
361}
362
363} // anonymous namespace
364
365namespace ROOT {
366namespace Internal {
367namespace RDF {
368
369/// Take a list of column names, return that list with entries starting by '#' filtered out.
370/// The function throws when filtering out a column this way.
371ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
372{
373 ColumnNames_t columnListWithoutSizeColumns;
374 ColumnNames_t filteredColumns;
375 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
376 [&](const std::string &name) {
377 if (name[0] == '#') {
378 filteredColumns.emplace_back(name);
379 return false;
380 } else {
381 return true;
382 }
383 });
384
385 if (!filteredColumns.empty()) {
386 std::string msg = "Column name(s) {";
387 for (auto &c : filteredColumns)
388 msg += c + ", ";
389 msg[msg.size() - 2] = '}';
390 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
391 throw std::runtime_error(msg);
392 }
393
394 return columnListWithoutSizeColumns;
395}
396
397std::string ResolveAlias(const std::string &col, const std::map<std::string, std::string> &aliasMap)
398{
399 const auto it = aliasMap.find(col);
400 if (it != aliasMap.end())
401 return it->second;
402
403 // #var is an alias for R_rdf_sizeof_var
404 if (col.size() > 1 && col[0] == '#')
405 return "R_rdf_sizeof_" + col.substr(1);
406
407 return col;
408}
409
410void CheckValidCppVarName(std::string_view var, const std::string &where)
411{
412 bool isValid = true;
413
414 if (var.empty())
415 isValid = false;
416 const char firstChar = var[0];
417
418 // first character must be either a letter or an underscore
419 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
420 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
421 if (!isValidFirstChar)
422 isValid = false;
423
424 // all characters must be either a letter, an underscore or a number
425 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
426 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
427 for (const char c : var)
428 if (!isValidTok(c))
429 isValid = false;
430
431 if (!isValid) {
432 const auto error =
433 "RDataFrame::" + where + ": cannot define column \"" + std::string(var) + "\". Not a valid C++ variable name.";
434 throw std::runtime_error(error);
435 }
436}
437
438///////////////////////////////////////////////////////////////////////////////
439/// Get all the top-level branches names, including the ones of the friend trees
441{
442 std::set<std::string> bNamesSet;
443 ColumnNames_t bNames;
444 std::set<TTree *> analysedTrees;
445 GetTopLevelBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
446 return bNames;
447}
448
449std::string DemangleTypeIdName(const std::type_info &typeInfo)
450{
451 int dummy(0);
452 char *tn = TClassEdit::DemangleTypeIdName(typeInfo, dummy);
453 std::string tname(tn);
454 free(tn);
455 return tname;
456}
457
458ColumnNames_t
459ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
460{
461 const auto theRegexSize = columnNameRegexp.size();
462 std::string theRegex(columnNameRegexp);
463
464 const auto isEmptyRegex = 0 == theRegexSize;
465 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
466 if (theRegexSize > 0 && theRegex[0] != '^')
467 theRegex = "^" + theRegex;
468 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
469 theRegex = theRegex + "$";
470
471 ColumnNames_t selectedColumns;
472
473 // Since we support gcc48 and it does not provide in its stl std::regex,
474 // we need to use TPRegexp
475 TPRegexp regexp(theRegex);
476 for (auto &&colName : colNames) {
477 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
478 selectedColumns.emplace_back(colName);
479 }
480 }
481
482 if (selectedColumns.empty()) {
483 std::string text(callerName);
484 if (columnNameRegexp.empty()) {
485 text = ": there is no column available to match.";
486 } else {
487 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
488 }
489 throw std::runtime_error(text);
490 }
491 return selectedColumns;
492}
493
494/// Throw if column `definedColView` is already there.
495void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols,
496 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
497{
498 const std::string definedCol(definedColView); // convert to std::string
499
500 std::string error;
501 if (customCols.IsAlias(definedCol))
502 error = "An alias with that name, pointing to column \"" + customCols.ResolveAlias(definedCol) +
503 "\", already exists in this branch of the computation graph.";
504 else if (customCols.HasName(definedCol))
505 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
506 // else, check if definedCol is in the list of tree branches. This is a bit better than interrogating the TTree
507 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
508 // got it right when we collected the list of available branches.
509 else if (std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end())
510 error =
511 "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
512 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end())
513 error =
514 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
515
516 if (!error.empty()) {
517 error = "RDataFrame::" + where + ": cannot define column \"" + definedCol + "\". " + error;
518 throw std::runtime_error(error);
519 }
520}
521
522/// Throw if column `definedColView` is _not_ already there.
523void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols,
524 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
525{
526 const std::string definedCol(definedColView); // convert to std::string
527 std::string error;
528
529 if (customCols.IsAlias(definedCol)) {
530 error = "An alias with that name, pointing to column \"" + customCols.ResolveAlias(definedCol) +
531 "\", already exists. Aliases cannot be Redefined or Varied.";
532 }
533
534 if (error.empty()) {
535 const bool isAlreadyDefined = customCols.HasName(definedCol);
536 // check if definedCol is in the list of tree branches. This is a bit better than interrogating the TTree
537 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
538 // got it right when we collected the list of available branches.
539 const bool isABranch = std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end();
540 const bool isADSColumn =
541 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end();
542
543 if (!isAlreadyDefined && !isABranch && !isADSColumn)
544 error = "No column with that name was found in the dataset. Use Define to create a new column.";
545 }
546
547 if (!error.empty()) {
548 error = "RDataFrame::" + where + ": cannot redefine or vary column \"" + definedCol + "\". " + error;
549 throw std::runtime_error(error);
550 }
551}
552
553/// Throw if the column has systematic variations attached.
554void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
555{
556 const std::string definedCol(definedColView);
557 const auto &variationDeps = customCols.GetVariationDeps(definedCol);
558 if (!variationDeps.empty()) {
559 const std::string error =
560 "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
561 "\". The column depends on one or more systematic variations and re-defining varied columns is not supported.";
562 throw std::runtime_error(error);
563 }
564}
565
566void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
567{
568 if (nTemplateParams != nColumnNames) {
569 std::string err_msg = "The number of template parameters specified is ";
570 err_msg += std::to_string(nTemplateParams);
571 err_msg += " while ";
572 err_msg += std::to_string(nColumnNames);
573 err_msg += " columns have been specified.";
574 throw std::runtime_error(err_msg);
575 }
576}
577
578/// Choose between local column names or default column names, throw in case of errors.
579const ColumnNames_t
580SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
581{
582 if (names.empty()) {
583 // use default column names
584 if (defaultNames.size() < nRequiredNames)
585 throw std::runtime_error(
586 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
587 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
588 // return first nRequiredNames default column names
589 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
590 } else {
591 // use column names provided by the user to this particular transformation/action
592 if (names.size() != nRequiredNames) {
593 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
594 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
595 " provided:";
596 for (const auto &name : names)
597 msg += " \"" + name + "\",";
598 msg.back() = '.';
599 throw std::runtime_error(msg);
600 }
601 return names;
602 }
603}
604
605ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns,
606 const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
607{
608 ColumnNames_t unknownColumns;
609 for (auto &column : requiredCols) {
610 const auto isBranch = std::find(datasetColumns.begin(), datasetColumns.end(), column) != datasetColumns.end();
611 if (isBranch)
612 continue;
613 if (definedCols.HasName(column))
614 continue;
615 const auto isDataSourceColumn =
616 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
617 if (isDataSourceColumn)
618 continue;
619 unknownColumns.emplace_back(column);
620 }
621 return unknownColumns;
622}
623
624std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
625{
626 return loopManager->GetFiltersNames();
627}
628
629ParsedTreePath ParseTreePath(std::string_view fullTreeName)
630{
631 // split name into directory and treename if needed
632 std::string_view dirName = "";
633 std::string_view treeName = fullTreeName;
634 const auto lastSlash = fullTreeName.rfind('/');
635 if (std::string_view::npos != lastSlash) {
636 dirName = treeName.substr(0, lastSlash);
637 treeName = treeName.substr(lastSlash + 1, treeName.size());
638 }
639 return {std::string(treeName), std::string(dirName)};
640}
641
642std::string PrettyPrintAddr(const void *const addr)
643{
644 std::stringstream s;
645 // Windows-friendly
646 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
647 return s.str();
648}
649
650/// Book the jitting of a Filter call
651std::shared_ptr<RDFDetail::RJittedFilter>
652BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view name, std::string_view expression,
653 const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
654{
655 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
656
657 const auto parsedExpr = ParseRDFExpression(expression, branches, customCols, dsColumns);
658 const auto exprVarTypes =
659 GetValidatedArgTypes(parsedExpr.fUsedCols, customCols, tree, ds, "Filter", /*vector2rvec=*/true);
660 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
661 const auto type = RetTypeOfLambda(lambdaName);
662 if (type != "bool")
663 std::runtime_error("Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
664
665 // definesOnHeap is deleted by the jitted call to JitFilterHelper
667 const auto definesOnHeapAddr = PrettyPrintAddr(definesOnHeap);
668 const auto prevNodeAddr = PrettyPrintAddr(prevNodeOnHeap);
669
670 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
671 (*prevNodeOnHeap)->GetLoopManagerUnchecked(), name,
672 Union(customCols.GetVariationDeps(parsedExpr.fUsedCols), (*prevNodeOnHeap)->GetVariations()));
673
674 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
675 // Windows requires std::hex << std::showbase << (size_t)pointer to produce notation "0x1234"
676 std::stringstream filterInvocation;
677 filterInvocation << "ROOT::Internal::RDF::JitFilterHelper(" << lambdaName << ", new const char*["
678 << parsedExpr.fUsedCols.size() << "]{";
679 for (const auto &col : parsedExpr.fUsedCols)
680 filterInvocation << "\"" << col << "\", ";
681 if (!parsedExpr.fUsedCols.empty())
682 filterInvocation.seekp(-2, filterInvocation.cur); // remove the last ",
683 // lifetime of pointees:
684 // - jittedFilter: heap-allocated weak_ptr to the actual jittedFilter that will be deleted by JitFilterHelper
685 // - prevNodeOnHeap: heap-allocated shared_ptr to the actual previous node that will be deleted by JitFilterHelper
686 // - definesOnHeap: heap-allocated, will be deleted by JitFilterHelper
687 filterInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name << "\", "
688 << "reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>("
689 << PrettyPrintAddr(MakeWeakOnHeap(jittedFilter)) << "), "
690 << "reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr << "),"
691 << "reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr << ")"
692 << ");\n";
693
694 auto lm = jittedFilter->GetLoopManagerUnchecked();
695 lm->ToJitExec(filterInvocation.str());
696
697 return jittedFilter;
698}
699
700/// Book the jitting of a Define call
701std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
702 RDataSource *ds, const RColumnRegister &customCols,
703 const ColumnNames_t &branches,
704 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
705{
706 auto *const tree = lm.GetTree();
707 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
708
709 const auto parsedExpr = ParseRDFExpression(expression, branches, customCols, dsColumns);
710 const auto exprVarTypes =
711 GetValidatedArgTypes(parsedExpr.fUsedCols, customCols, tree, ds, "Define", /*vector2rvec=*/true);
712 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
713 const auto type = RetTypeOfLambda(lambdaName);
714
715 auto definesCopy = new RColumnRegister(customCols);
716 auto definesAddr = PrettyPrintAddr(definesCopy);
717 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, customCols, parsedExpr.fUsedCols);
718
719 std::stringstream defineInvocation;
720 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
721 << lambdaName << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
722 for (const auto &col : parsedExpr.fUsedCols) {
723 defineInvocation << "\"" << col << "\", ";
724 }
725 if (!parsedExpr.fUsedCols.empty())
726 defineInvocation.seekp(-2, defineInvocation.cur); // remove the last ",
727 // lifetime of pointees:
728 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
729 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
730 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
731 defineInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name
732 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
733 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
734 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
735 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
736 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
737 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
738
739 lm.ToJitExec(defineInvocation.str());
740 return jittedDefine;
741}
742
743/// Book the jitting of a DefinePerSample call
744std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
745 RLoopManager &lm, const RColumnRegister &customCols,
746 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
747{
748 const auto lambdaName = DeclareLambda(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
749 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
750 const auto retType = RetTypeOfLambda(lambdaName);
751
752 auto definesCopy = new RColumnRegister(customCols);
753 auto definesAddr = PrettyPrintAddr(definesCopy);
754 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, customCols, ColumnNames_t{});
755
756 std::stringstream defineInvocation;
757 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
758 << lambdaName << ", nullptr, 0, ";
759 // lifetime of pointees:
760 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
761 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
762 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
763 defineInvocation << "\"" << name << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
764 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
765 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
766 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
767 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
768 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
769
770 lm.ToJitExec(defineInvocation.str());
771 return jittedDefine;
772}
773
774/// Book the jitting of a Vary call
775std::shared_ptr<RJittedVariation>
776BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
777 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
778 RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches,
779 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
780{
781 auto *const tree = lm.GetTree();
782 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
783
784 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
785 const auto exprVarTypes =
786 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Vary", /*vector2rvec=*/true);
787 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
788 const auto type = RetTypeOfLambda(lambdaName);
789
790 if (type.rfind("ROOT::VecOps::RVec", 0) != 0)
791 throw std::runtime_error(
792 "Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
793 " instead:\n" + parsedExpr.fExpr);
794
795 auto colRegisterCopy = new RColumnRegister(colRegister);
796 const auto colRegisterAddr = PrettyPrintAddr(colRegisterCopy);
797 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
798 lm, parsedExpr.fUsedCols);
799
800 // build invocation to JitVariationHelper
801 // arrays of strings are passed as const char** plus size.
802 // lifetime of pointees:
803 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
804 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
805 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
806 std::stringstream varyInvocation;
807 varyInvocation << "ROOT::Internal::RDF::JitVariationHelper(" << lambdaName << ", new const char*["
808 << parsedExpr.fUsedCols.size() << "]{";
809 for (const auto &col : parsedExpr.fUsedCols) {
810 varyInvocation << "\"" << col << "\", ";
811 }
812 if (!parsedExpr.fUsedCols.empty())
813 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
814 varyInvocation << "}, " << parsedExpr.fUsedCols.size();
815 varyInvocation << ", new const char*[" << colNames.size() << "]{";
816 for (const auto &col : colNames) {
817 varyInvocation << "\"" << col << "\", ";
818 }
819 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
820 varyInvocation << "}, " << colNames.size() << ", new const char*[" << variationTags.size() << "]{";
821 for (const auto &tag : variationTags) {
822 varyInvocation << "\"" << tag << "\", ";
823 }
824 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
825 varyInvocation << "}, " << variationTags.size() << ", \"" << variationName
826 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
827 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>("
828 << PrettyPrintAddr(MakeWeakOnHeap(jittedVariation))
829 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
830 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
831 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
832
833 lm.ToJitExec(varyInvocation.str());
834 return jittedVariation;
835}
836
837// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
838// (see comments in the body for actual jitted code)
839std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
840 const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap,
841 TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds,
842 std::weak_ptr<RJittedAction> *jittedActionOnHeap)
843{
844 // retrieve type of result of the action as a string
845 auto helperArgClass = TClass::GetClass(helperArgType);
846 if (!helperArgClass) {
847 std::string exceptionText = "An error occurred while inferring the result type of an operation.";
848 throw std::runtime_error(exceptionText.c_str());
849 }
850 const auto helperArgClassName = helperArgClass->GetName();
851
852 // retrieve type of action as a string
853 auto actionTypeClass = TClass::GetClass(at);
854 if (!actionTypeClass) {
855 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
856 throw std::runtime_error(exceptionText.c_str());
857 }
858 const std::string actionTypeName = actionTypeClass->GetName();
859 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
860
861 auto definesCopy = new RColumnRegister(customCols); // deleted in jitted CallBuildAction
862 auto definesAddr = PrettyPrintAddr(definesCopy);
863
864 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
865 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
866 std::stringstream createAction_str;
867 createAction_str << "ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
868 const auto columnTypeNames =
869 GetValidatedArgTypes(cols, customCols, tree, ds, actionTypeNameBase, /*vector2rvec=*/true);
870 for (auto &colType : columnTypeNames)
871 createAction_str << ", " << colType;
872 // on Windows, to prefix the hexadecimal value of a pointer with '0x',
873 // one need to write: std::hex << std::showbase << (size_t)pointer
874 createAction_str << ">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
875 << PrettyPrintAddr(prevNode) << "), new const char*[" << cols.size() << "]{";
876 for (auto i = 0u; i < cols.size(); ++i) {
877 if (i != 0u)
878 createAction_str << ", ";
879 createAction_str << '"' << cols[i] << '"';
880 }
881 createAction_str << "}, " << cols.size() << ", " << nSlots << ", reinterpret_cast<" << helperArgClassName << "*>("
882 << PrettyPrintAddr(helperArgOnHeap)
883 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>("
884 << PrettyPrintAddr(jittedActionOnHeap)
885 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr << "));";
886 return createAction_str.str();
887}
888
889bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
890{
891 for (const auto &s : strings) {
892 if (s.empty())
893 return true;
894 }
895 return false;
896}
897
898std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
899{
900 return ptr;
901}
902
903/// Given the desired number of columns and the user-provided list of columns:
904/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
905/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
906/// * replace column names from aliases by the actual column name
907/// Return the list of selected column names.
908ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
909 const RColumnRegister &customColumns, RDataSource *ds)
910{
911 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
912
913 for (auto &col : selectedColumns) {
914 col = customColumns.ResolveAlias(col);
915 }
916
917 // Complain if there are still unknown columns at this point
918 const auto unknownColumns = FindUnknownColumns(selectedColumns, lm.GetBranchNames(), customColumns,
919 ds ? ds->GetColumnNames() : ColumnNames_t{});
920
921 if (!unknownColumns.empty()) {
922 std::stringstream unknowns;
923 std::string delim = unknownColumns.size() > 1 ? "s: " : ": "; // singular/plural
924 for (auto &unknownColumn : unknownColumns) {
925 unknowns << delim << unknownColumn;
926 delim = ',';
927 }
928 throw std::runtime_error("Unknown column" + unknowns.str());
929 }
930
931 return selectedColumns;
932}
933
934std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
935 TTree *tree, RDataSource *ds, const std::string &context,
936 bool vector2rvec)
937{
938 auto toCheckedArgType = [&](const std::string &c) {
939 RDFDetail::RDefineBase *define = colRegister.HasName(c) ? colRegister.GetColumns().at(c).get() : nullptr;
940 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2rvec);
941 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
942 const auto msg =
943 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
944 ") is not known to the interpreter, but a just-in-time-compiled " + context +
945 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
946 throw std::runtime_error(msg);
947 }
948 return colType;
949 };
950 std::vector<std::string> colTypes;
951 colTypes.reserve(colNames.size());
952 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
953 return colTypes;
954}
955
956/// Return a bitset each element of which indicates whether the corresponding element in `selectedColumns` is the
957/// name of a column that must be defined via datasource. All elements of the returned vector are false if no
958/// data-source is present.
959std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
960{
961 const auto nColumns = requestedCols.size();
962 std::vector<bool> mustBeDefined(nColumns, false);
963 for (auto i = 0u; i < nColumns; ++i)
964 mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
965 return mustBeDefined;
966}
967
969{
970 std::unordered_set<std::string> uniqueCols;
971 for (auto &col : cols) {
972 if (!uniqueCols.insert(col).second) {
973 const auto msg = "Error: column \"" + col +
974 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
975 "readable with RDataFrame.";
976 throw std::logic_error(msg);
977 }
978 }
979}
980
981////////////////////////////////////////////////////////////////////////////////
982/// \brief Trigger the execution of an RDataFrame computation graph.
983/// \param[in] node A node of the computation graph (not a result).
984///
985/// This function calls the RLoopManager::Run method on the \p fLoopManager data
986/// member of the input argument. It is intended for internal use only.
988 node.fLoopManager->Run();
989}
990
991/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
992/// in the right positions (i.e. before the array branches that need them).
993std::pair<std::vector<std::string>, std::vector<std::string>>
994AddSizeBranches(const std::vector<std::string> &branches, TTree *tree, std::vector<std::string> &&colsWithoutAliases,
995 std::vector<std::string> &&colsWithAliases)
996{
997 if (!tree) // nothing to do
998 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
999
1000 assert(colsWithoutAliases.size() == colsWithAliases.size());
1001
1002 auto nCols = colsWithoutAliases.size();
1003 // Use index-iteration as we modify the vector during the iteration.
1004 for (std::size_t i = 0u; i < nCols; ++i) {
1005 const auto &colName = colsWithoutAliases[i];
1006 if (!IsStrInVec(colName, branches))
1007 continue; // this column is not a TTree branch, nothing to do
1008
1009 auto *b = tree->GetBranch(colName.c_str());
1010 if (!b) // try harder
1011 b = tree->FindBranch(colName.c_str());
1012 assert(b != nullptr);
1013 auto *leaves = b->GetListOfLeaves();
1014 if (b->IsA() != TBranch::Class() || leaves->GetEntries() != 1)
1015 continue; // this branch is not a variable-sized array, nothing to do
1016
1017 TLeaf *countLeaf = static_cast<TLeaf *>(leaves->At(0))->GetLeafCount();
1018 if (!countLeaf || IsStrInVec(countLeaf->GetName(), colsWithoutAliases))
1019 continue; // not a variable-sized array or the size branch is already there, nothing to do
1020
1021 // otherwise we must insert the size in colsWithoutAliases _and_ colsWithAliases
1022 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->GetName());
1023 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->GetName());
1024 ++nCols;
1025 ++i; // as we inserted an element in the vector we iterate over, we need to move the index forward one extra time
1026 }
1027
1028 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1029}
1030
1032{
1033 std::set<std::string> uniqueCols;
1034 columnNames.erase(
1035 std::remove_if(columnNames.begin(), columnNames.end(),
1036 [&uniqueCols](const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1037 columnNames.end());
1038}
1039
1040} // namespace RDF
1041} // namespace Internal
1042} // namespace ROOT
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition TError.h:118
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
R__EXTERN TVirtualMutex * gROOTMutex
Definition TROOT.h:63
#define gROOT
Definition TROOT.h:404
#define R__LOCKGUARD(mutex)
#define free
Definition civetweb.c:1539
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
A binder for user-defined columns and aliases.
bool IsAlias(const std::string &name) const
Return true if the given column name is an existing alias.
const DefinesMap_t & GetColumns() const
Returns a map of pointers to the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
The public interface to the RDataFrame federation of classes.
RLoopManager * fLoopManager
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2966
virtual const char * GetName() const
Return name of this collection.
A TFriendElement TF describes a TTree object TF in a file.
virtual TTree * GetTree()
Return pointer to friend TTree.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
virtual const char * GetName() const
Returns name of object.
Definition TNamed.h:47
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition TPRegexp.h:78
Basic string class.
Definition TString.h:136
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual TObjArray * GetListOfBranches()
Definition TTree.h:485
virtual TList * GetListOfFriends() const
Definition TTree.h:487
TText * text
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:224
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:419
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:274
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:365
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:317
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::vector< std::string > ColumnNames_t
Definition Utils.hxx:35
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
Definition tree.py:1