Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/RDataSource.hxx>
12#include <ROOT/RTTreeDS.hxx>
15#include <ROOT/RDF/RDisplay.hxx>
21#include <ROOT/RDF/Utils.hxx>
22#include <string_view>
23#include <TBranch.h>
24#include <TClass.h>
25#include <TClassEdit.h>
26#include <TDataType.h>
27#include <TError.h>
28#include <TLeaf.h>
29#include <TObjArray.h>
30#include <TPRegexp.h>
31#include <TROOT.h>
32#include <TString.h>
33#include <TTree.h>
34#include <TVirtualMutex.h>
35
36// pragma to disable warnings on Rcpp which have
37// so many noise compiling
38#if defined(__GNUC__)
39#pragma GCC diagnostic push
40#pragma GCC diagnostic ignored "-Woverloaded-virtual"
41#pragma GCC diagnostic ignored "-Wshadow"
42#endif
43#include "lexertk.hpp"
44#if defined(__GNUC__)
45#pragma GCC diagnostic pop
46#endif
47
48#include <algorithm>
49#include <cassert>
50#include <cstdlib> // for size_t
51#include <iterator> // for back_insert_iterator
52#include <map>
53#include <memory>
54#include <set>
55#include <sstream>
56#include <stdexcept>
57#include <string>
58#include <typeinfo>
59#include <unordered_map>
60#include <unordered_set>
61#include <utility> // for pair
62#include <vector>
63
64namespace ROOT::Detail::RDF {
65class RDefineBase;
66}
67
68namespace {
71
72/// A string expression such as those passed to Filter and Define, digested to a standardized form
73struct ParsedExpression {
74 /// The string expression with the dummy variable names in fVarNames in place of the original column names
75 std::string fExpr;
76 /// The list of valid column names that were used in the original string expression.
77 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
78 ColumnNames_t fUsedCols;
79 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
80 ColumnNames_t fVarNames;
81};
82
83/// Look at expression `expr` and return a pair of (column names used, aliases used)
84std::pair<ColumnNames_t, ColumnNames_t> FindUsedColsAndAliases(const std::string &expr,
86 const ColumnNames_t &dataSourceColNames)
87{
88 lexertk::generator tokens;
89 const auto tokensOk = tokens.process(expr);
90 if (!tokensOk) {
91 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
92 throw std::runtime_error(msg);
93 }
94
95 std::unordered_set<std::string> usedCols;
96 std::unordered_set<std::string> usedAliases;
97
98 // iterate over tokens in expression and fill usedCols and usedAliases
99 const auto nTokens = tokens.size();
100 const auto kSymbol = lexertk::token::e_symbol;
101 for (auto i = 0u; i < nTokens; ++i) {
102 const auto &tok = tokens[i];
103 // lexertk classifies '&' as e_symbol for some reason
104 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
105 // token is not a potential variable name, skip it
106 continue;
107 }
108
109 ColumnNames_t potentialColNames({tok.value});
110
111 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
112 auto dotChainKeepsGoing = [&](unsigned int _i) {
113 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
114 };
115 while (dotChainKeepsGoing(i)) {
116 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
117 i += 2; // consume the tokens we looked at
118 }
119
120 // in an expression such as `a.b`, if `a` is a column alias add it to `usedAliases` and
121 // replace the alias with the real column name in `potentialColNames`.
122 const auto maybeAnAlias = potentialColNames[0]; // intentionally a copy as we'll modify potentialColNames later
123 const auto &resolvedAlias = colRegister.ResolveAlias(maybeAnAlias);
124 if (resolvedAlias != maybeAnAlias) { // this is an alias
126 for (auto &s : potentialColNames)
127 s.replace(0, maybeAnAlias.size(), resolvedAlias);
128 }
129
130 // find the longest potential column name that is an actual column name
131 // (potential columns are sorted by length, so we search from the end to find the longest)
132 auto isRDFColumn = [&](const std::string &col) {
133 if (colRegister.IsDefineOrAlias(col) || IsStrInVec(col, dataSourceColNames))
134 return true;
135 return false;
136 };
137 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
140 }
141
142 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
143}
144
145/// Substitute each '.' in a string with '\.'
146std::string EscapeDots(const std::string &s)
147{
148 TString out(s);
149 TPRegexp dot("\\.");
150 dot.Substitute(out, "\\.", "g");
151 return std::string(std::move(out));
152}
153
154TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
156{
157 TString out(expr);
158
159 for (const auto &alias : usedAliases) {
160 const auto &col = colRegister.ResolveAlias(alias);
161 TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
162 replacer.Substitute(out, col.data(), "g");
163 }
164
165 return out;
166}
167
168ParsedExpression ParseRDFExpression(std::string_view expr, const ROOT::Internal::RDF::RColumnRegister &colRegister,
169 const ColumnNames_t &dataSourceColNames)
170{
171 // transform `#var` into `R_rdf_sizeof_var`
173 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
175 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
176 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
177
178 ColumnNames_t usedCols;
179 ColumnNames_t usedAliases;
180 std::tie(usedCols, usedAliases) =
182
184
185 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
186 // the dummy variable names in varNames
188
189 ColumnNames_t varNames(usedCols.size());
190 for (auto i = 0u; i < varNames.size(); ++i)
191 varNames[i] = "var" + std::to_string(i);
192
193 // sort the vector usedColsAndAliases by decreasing length of its elements,
194 // so in case of friends we guarantee we never substitute a column name with another column containing it
195 // ex. without sorting when passing "x" and "fr.x", the replacer would output "var0" and "fr.var0",
196 // because it has already substituted "x", hence the "x" in "fr.x" would be recognized as "var0",
197 // whereas the desired behaviour is handling them as "var0" and "var1"
198 std::sort(usedCols.begin(), usedCols.end(),
199 [](const std::string &a, const std::string &b) { return a.size() > b.size(); });
200 for (const auto &col : usedCols) {
201 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
202 TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
203 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
204 }
205
206 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
207}
208
209/// Return the static global map of Filter/Define functions that have been jitted.
210/// It's used to check whether a given expression has already been jitted, and
211/// to look up its associated variable name if it is.
212/// Keys in the map are the body of the expression, values are the name of the
213/// jitted variable that corresponds to that expression. For example, for:
214/// auto f1(){ return 42; }
215/// key would be "(){ return 42; }" and value would be "f1".
216std::unordered_map<std::string, std::string> &GetJittedExprs() {
217 static std::unordered_map<std::string, std::string> jittedExpressions;
218 return jittedExpressions;
219}
220
221std::string
222BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
223{
224 assert(vars.size() == varTypes.size());
225
226 TPRegexp re(R"(\breturn\b)");
227 const bool hasReturnStmt = re.MatchB(expr);
228
229 static const std::vector<std::string> fundamentalTypes = {
230 "int",
231 "signed",
232 "signed int",
233 "Int_t",
234 "unsigned",
235 "unsigned int",
236 "UInt_t",
237 "double",
238 "Double_t",
239 "float",
240 "Float_t",
241 "char",
242 "Char_t",
243 "unsigned char",
244 "UChar_t",
245 "bool",
246 "Bool_t",
247 "short",
248 "short int",
249 "Short_t",
250 "long",
251 "long int",
252 "long long int",
253 "Long64_t",
254 "unsigned long",
255 "unsigned long int",
256 "ULong64_t",
257 "std::size_t",
258 "size_t",
259 "Ssiz_t"
260 };
261
262 std::stringstream ss;
263 ss << "(";
264 for (auto i = 0u; i < vars.size(); ++i) {
265 std::string fullType;
266 const auto &type = varTypes[i];
268 // pass it by const value to help detect common mistakes such as if(x = 3)
269 fullType = "const " + type + " ";
270 } else {
271 // We pass by reference to avoid expensive copies
272 // It can't be const reference in general, as users might want/need to call non-const methods on the values
273 fullType = type + "& ";
274 }
275 ss << fullType << vars[i] << ", ";
276 }
277 if (!vars.empty())
278 ss.seekp(-2, ss.cur);
279
280 if (hasReturnStmt)
281 ss << "){";
282 else
283 ss << "){return ";
284 ss << expr << "\n;}";
285
286 return ss.str();
287}
288
289/// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function.
290/// If the function is already in GetJittedExprs, return the name for the function that has already been jitted.
291std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
292{
294
295 const auto funcCode = BuildFunctionString(expr, vars, varTypes);
296 auto &exprMap = GetJittedExprs();
297 const auto exprIt = exprMap.find(funcCode);
298 if (exprIt != exprMap.end()) {
299 // expression already there
300 const auto funcName = exprIt->second;
301 return funcName;
302 }
303
304 // new expression
305 const auto funcBaseName = "func" + std::to_string(exprMap.size());
306 const auto funcFullName = "R_rdf::" + funcBaseName;
307
308 const auto toDeclare = "namespace R_rdf {\nauto " + funcBaseName + funcCode + "\nusing " + funcBaseName +
309 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
310 ")>::ret_type;\n}";
312
313 // InterpreterDeclare could throw. If it doesn't, mark the function as already jitted
314 exprMap.insert({funcCode, funcFullName});
315
316 return funcFullName;
317}
318
319/// Each jitted function comes with a func_ret_t type alias for its return type.
320/// Resolve that alias and return the true type as string.
321std::string RetTypeOfFunc(const std::string &funcName)
322{
323 const auto dt = gROOT->GetType((funcName + "_ret_t").c_str());
324 R__ASSERT(dt != nullptr);
325 const auto type = dt->GetFullTypeName();
326 return type;
327}
328
329[[noreturn]] void
330ThrowJitBuildActionHelperTypeError(const std::string &actionTypeNameBase, const std::type_info &helperArgType)
331{
332 int err = 0;
334 std::string actionHelperTypeName = cname;
335 delete[] cname;
336 if (err != 0)
338
339 std::string exceptionText =
340 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + "\" action using helper type \"" +
342 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
343 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
344 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
345 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
346 "interpreter, e.g. via gInterpreter->Declare.";
347
348 throw std::runtime_error(exceptionText);
349}
350
351} // anonymous namespace
352
353namespace ROOT {
354namespace Internal {
355namespace RDF {
356
357/// Take a list of column names, return that list with entries starting by '#' filtered out.
358/// The function throws when filtering out a column this way.
360{
363 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
364 [&](const std::string &name) {
365 if (name[0] == '#') {
366 filteredColumns.emplace_back(name);
367 return false;
368 } else {
369 return true;
370 }
371 });
372
373 if (!filteredColumns.empty()) {
374 std::string msg = "Column name(s) {";
375 for (auto &c : filteredColumns)
376 msg += c + ", ";
377 msg[msg.size() - 2] = '}';
378 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
379 throw std::runtime_error(msg);
380 }
381
383}
384
385std::string ResolveAlias(const std::string &col, const std::map<std::string, std::string> &aliasMap)
386{
387 const auto it = aliasMap.find(col);
388 if (it != aliasMap.end())
389 return it->second;
390
391 // #var is an alias for R_rdf_sizeof_var
392 if (col.size() > 1 && col[0] == '#')
393 return "R_rdf_sizeof_" + col.substr(1);
394
395 return col;
396}
397
398void CheckValidCppVarName(std::string_view var, const std::string &where)
399{
400 bool isValid = true;
401
402 if (var.empty())
403 isValid = false;
404 const char firstChar = var[0];
405
406 // first character must be either a letter or an underscore
407 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
408 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
409 if (!isValidFirstChar)
410 isValid = false;
411
412 // all characters must be either a letter, an underscore or a number
413 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
414 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
415 for (const char c : var)
416 if (!isValidTok(c))
417 isValid = false;
418
419 if (!isValid) {
420 const auto objName = where == "Define" ? "column" : "variation";
421 const auto error = "RDataFrame::" + where + ": cannot define " + objName + " \"" + std::string(var) +
422 "\". Not a valid C++ variable name.";
423 throw std::runtime_error(error);
424 }
425}
426
427std::string DemangleTypeIdName(const std::type_info &typeInfo)
428{
429 int dummy(0);
431 std::string tname(tn);
432 free(tn);
433 return tname;
434}
435
436ColumnNames_t
437ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
438{
439 const auto theRegexSize = columnNameRegexp.size();
440 std::string theRegex(columnNameRegexp);
441
442 const auto isEmptyRegex = 0 == theRegexSize;
443 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
444 if (theRegexSize > 0 && theRegex[0] != '^')
445 theRegex = "^" + theRegex;
446 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
447 theRegex = theRegex + "$";
448
450
451 // Since we support gcc48 and it does not provide in its stl std::regex,
452 // we need to use TPRegexp
453 TPRegexp regexp(theRegex);
454 for (auto &&colName : colNames) {
455 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
456 selectedColumns.emplace_back(colName);
457 }
458 }
459
460 if (selectedColumns.empty()) {
461 std::string text(callerName);
462 if (columnNameRegexp.empty()) {
463 text = ": there is no column available to match.";
464 } else {
465 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
466 }
467 throw std::runtime_error(text);
468 }
469 return selectedColumns;
470}
471
472/// Throw if column `definedColView` is already there.
473void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
475{
476
477 std::string error{};
478 if (colRegister.IsAlias(definedColView))
479 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
480 "\", already exists in this branch of the computation graph.";
481 else if (colRegister.IsDefineOrAlias(definedColView))
482 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
484 error =
485 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
486
487 if (!error.empty()) {
488 error = "RDataFrame::" + where + ": cannot define column \"" + std::string(definedColView) + "\". " + error;
489 throw std::runtime_error(error);
490 }
491}
492
493/// Throw if column `definedColView` is _not_ already there.
494void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
496{
497 std::string error{};
498
499 if (colRegister.IsAlias(definedColView)) {
500 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
501 "\", already exists. Aliases cannot be Redefined or Varied.";
502 }
503
504 if (error.empty()) {
505 const bool isAlreadyDefined = colRegister.IsDefineOrAlias(definedColView);
506 const bool isADSColumn =
508
510 error = "No column with that name was found in the dataset. Use Define to create a new column.";
511 }
512
513 if (!error.empty()) {
514 if (where == "DefaultValueFor")
515 error = "RDataFrame::" + where + ": cannot provide default values for column \"" +
516 std::string(definedColView) + "\". " + error;
517 else
518 error = "RDataFrame::" + where + ": cannot redefine or vary column \"" + std::string(definedColView) + "\". " +
519 error;
520 throw std::runtime_error(error);
521 }
522}
523
524/// Throw if the column has systematic variations attached.
525void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
526{
527 const std::string definedCol(definedColView);
528 const auto &variationDeps = colRegister.GetVariationDeps(definedCol);
529 if (!variationDeps.empty()) {
530 if (where == "Redefine") {
531 const std::string error = "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
532 "\". The column depends on one or more systematic variations and re-defining varied "
533 "columns is not supported.";
534 throw std::runtime_error(error);
535 } else if (where == "DefaultValueFor") {
536 const std::string error = "RDataFrame::" + where + ": cannot provide a default value for column \"" +
537 definedCol +
538 "\". The column depends on one or more systematic variations and it should not be "
539 "possible to have missing values in varied columns.";
540 throw std::runtime_error(error);
541 } else {
542 const std::string error =
543 "RDataFrame::" + where + ": this operation cannot work with columns that depend on systematic variations.";
544 throw std::runtime_error(error);
545 }
546 }
547}
548
549void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
550{
552 std::string err_msg = "The number of template parameters specified is ";
553 err_msg += std::to_string(nTemplateParams);
554 err_msg += " while ";
555 err_msg += std::to_string(nColumnNames);
556 err_msg += " columns have been specified.";
557 throw std::runtime_error(err_msg);
558 }
559}
560
561/// Choose between local column names or default column names, throw in case of errors.
562const ColumnNames_t
564{
565 if (names.empty()) {
566 // use default column names
567 if (defaultNames.size() < nRequiredNames)
568 throw std::runtime_error(
569 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
570 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
571 // return first nRequiredNames default column names
573 } else {
574 // use column names provided by the user to this particular transformation/action
575 if (names.size() != nRequiredNames) {
576 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
577 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
578 " provided:";
579 for (const auto &name : names)
580 msg += " \"" + name + "\",";
581 msg.back() = '.';
582 throw std::runtime_error(msg);
583 }
584 return names;
585 }
586}
587
590{
592 for (auto &column : requiredCols) {
593 if (definedCols.IsDefineOrAlias(column))
594 continue;
595 const auto isDataSourceColumn =
598 continue;
599 unknownColumns.emplace_back(column);
600 }
601 return unknownColumns;
602}
603
604std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
605{
606 return loopManager->GetFiltersNames();
607}
608
610{
611 // split name into directory and treename if needed
612 std::string_view dirName = "";
613 std::string_view treeName = fullTreeName;
614 const auto lastSlash = fullTreeName.rfind('/');
615 if (std::string_view::npos != lastSlash) {
616 dirName = treeName.substr(0, lastSlash);
617 treeName = treeName.substr(lastSlash + 1, treeName.size());
618 }
619 return {std::string(treeName), std::string(dirName)};
620}
621
622std::string PrettyPrintAddr(const void *const addr)
623{
624 std::stringstream s;
625 // Windows-friendly
627 return s.str();
628}
629
630/// Book the jitting of a Filter call
631std::shared_ptr<RDFDetail::RJittedFilter>
632BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view name, std::string_view expression,
634{
635 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
636
637 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
638 const auto exprVarTypes =
639 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Filter", /*vector2RVec=*/true);
640 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
641 const auto type = RetTypeOfFunc(funcName);
642 if (type != "bool")
643 std::runtime_error("Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
644
645 // definesOnHeap is deleted by the jitted call to JitFilterHelper
649
650 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
651 (*prevNodeOnHeap)->GetLoopManagerUnchecked(), name,
652 Union(colRegister.GetVariationDeps(parsedExpr.fUsedCols), (*prevNodeOnHeap)->GetVariations()));
653
654 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
655 // Windows requires std::hex << std::showbase << (size_t)pointer to produce notation "0x1234"
656 std::stringstream filterInvocation;
657 filterInvocation << "ROOT::Internal::RDF::JitFilterHelper(" << funcName << ", new const char*["
658 << parsedExpr.fUsedCols.size() << "]{";
659 for (const auto &col : parsedExpr.fUsedCols)
660 filterInvocation << "\"" << col << "\", ";
661 if (!parsedExpr.fUsedCols.empty())
662 filterInvocation.seekp(-2, filterInvocation.cur); // remove the last ",
663 // lifetime of pointees:
664 // - jittedFilter: heap-allocated weak_ptr to the actual jittedFilter that will be deleted by JitFilterHelper
665 // - prevNodeOnHeap: heap-allocated shared_ptr to the actual previous node that will be deleted by JitFilterHelper
666 // - definesOnHeap: heap-allocated, will be deleted by JitFilterHelper
667 filterInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name << "\", "
668 << "reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>("
670 << "reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr << "),"
671 << "reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr << ")"
672 << ");\n";
673
674 auto lm = jittedFilter->GetLoopManagerUnchecked();
675 lm->ToJitExec(filterInvocation.str());
676
677 return jittedFilter;
678}
679
680/// Book the jitting of a Define call
681std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
683 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
684{
685 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
686
687 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
688 const auto exprVarTypes =
689 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Define", /*vector2RVec=*/true);
690 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
691 const auto type = RetTypeOfFunc(funcName);
692
695 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, colRegister, parsedExpr.fUsedCols);
696
697 std::stringstream defineInvocation;
698 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>(" << funcName
699 << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
700 for (const auto &col : parsedExpr.fUsedCols) {
701 defineInvocation << "\"" << col << "\", ";
702 }
703 if (!parsedExpr.fUsedCols.empty())
704 defineInvocation.seekp(-2, defineInvocation.cur); // remove the last ",
705 // lifetime of pointees:
706 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
707 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
708 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
709 defineInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name
710 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
711 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
713 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
714 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
715 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
716
717 lm.ToJitExec(defineInvocation.str());
718 return jittedDefine;
719}
720
721/// Book the jitting of a DefinePerSample call
722std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
724 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
725{
726 const auto funcName = DeclareFunction(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
727 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
728 const auto retType = RetTypeOfFunc(funcName);
729
732 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, colRegister, ColumnNames_t{});
733
734 std::stringstream defineInvocation;
735 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
736 << funcName << ", nullptr, 0, ";
737 // lifetime of pointees:
738 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
739 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
740 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
741 defineInvocation << "\"" << name << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
742 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
744 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
745 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
746 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
747
748 lm.ToJitExec(defineInvocation.str());
749 return jittedDefine;
750}
751
752/// Book the jitting of a Vary call
753std::shared_ptr<RJittedVariation>
754BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
755 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
756 RDataSource *ds, const RColumnRegister &colRegister, std::shared_ptr<RNodeBase> *upcastNodeOnHeap,
757 bool isSingleColumn)
758{
759 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
760
761 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
762 const auto exprVarTypes =
763 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Vary", /*vector2RVec=*/true);
764 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
765 const auto type = RetTypeOfFunc(funcName);
766
767 if (type.rfind("ROOT::VecOps::RVec", 0) != 0) {
768 // Avoid leak
769 delete upcastNodeOnHeap;
770 upcastNodeOnHeap = nullptr;
771 throw std::runtime_error(
772 "Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
773 " instead:\n" + parsedExpr.fExpr);
774 }
775
778 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
779 lm, parsedExpr.fUsedCols);
780
781 // build invocation to JitVariationHelper
782 // arrays of strings are passed as const char** plus size.
783 // lifetime of pointees:
784 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
785 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
786 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
787 std::stringstream varyInvocation;
788 varyInvocation << "ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? "true" : "false") << ">("
789 << funcName << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
790 for (const auto &col : parsedExpr.fUsedCols) {
791 varyInvocation << "\"" << col << "\", ";
792 }
793 if (!parsedExpr.fUsedCols.empty())
794 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
795 varyInvocation << "}, " << parsedExpr.fUsedCols.size();
796 varyInvocation << ", new const char*[" << colNames.size() << "]{";
797 for (const auto &col : colNames) {
798 varyInvocation << "\"" << col << "\", ";
799 }
800 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
801 varyInvocation << "}, " << colNames.size() << ", new const char*[" << variationTags.size() << "]{";
802 for (const auto &tag : variationTags) {
803 varyInvocation << "\"" << tag << "\", ";
804 }
805 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
806 varyInvocation << "}, " << variationTags.size() << ", \"" << variationName
807 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
808 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>("
810 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
811 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
812 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
813
814 lm.ToJitExec(varyInvocation.str());
815 return jittedVariation;
816}
817
818// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
819// (see comments in the body for actual jitted code)
820std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
821 const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap,
822 TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
823 std::weak_ptr<RJittedAction> *jittedActionOnHeap, const bool vector2RVec)
824{
825 // retrieve type of action as a string
827 if (!actionTypeClass) {
828 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
829 throw std::runtime_error(exceptionText);
830 }
831 const std::string actionTypeName = actionTypeClass->GetName();
832 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
833
834 // retrieve type of result of the action as a string
836 if (helperArgTypeName.empty()) {
838 }
839
840 auto definesCopy = new RColumnRegister(colRegister); // deleted in jitted CallBuildAction
842
843 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
844 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
845 std::stringstream createAction_str;
846 createAction_str << "ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
848 for (auto &colType : columnTypeNames)
849 createAction_str << ", " << colType;
850 // on Windows, to prefix the hexadecimal value of a pointer with '0x',
851 // one need to write: std::hex << std::showbase << (size_t)pointer
852 createAction_str << ">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
853 << PrettyPrintAddr(prevNode) << "), new const char*[" << cols.size() << "]{";
854 for (auto i = 0u; i < cols.size(); ++i) {
855 if (i != 0u)
856 createAction_str << ", ";
857 createAction_str << '"' << cols[i] << '"';
858 }
859 createAction_str << "}, " << cols.size() << ", " << nSlots << ", reinterpret_cast<shared_ptr<" << helperArgTypeName
860 << ">*>(" << PrettyPrintAddr(helperArgOnHeap)
861 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>("
863 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr << "));";
864 return createAction_str.str();
865}
866
867bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
868{
869 for (const auto &s : strings) {
870 if (s.empty())
871 return true;
872 }
873 return false;
874}
875
876std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
877{
878 return ptr;
879}
880
881/// Given the desired number of columns and the user-provided list of columns:
882/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
883/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
884/// * replace column names from aliases by the actual column name
885/// Return the list of selected column names.
888{
889 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
890
891 for (auto &col : selectedColumns) {
892 col = colRegister.ResolveAlias(col);
893 }
894
895 // Complain if there are still unknown columns at this point
897
898 if (!unknownColumns.empty()) {
899 // Some columns are still unknown, we need to understand if the error
900 // should be printed or if the user requested to explicitly disable it.
901 // Look for a possible overlap between the unknown columns and the
902 // columns we should ignore for the purpose of the following exception
903 std::set<std::string> intersection;
904 const auto &colsToIgnore = lm.GetSuppressErrorsForMissingBranches();
905 std::sort(unknownColumns.begin(), unknownColumns.end());
906 std::set_intersection(unknownColumns.cbegin(), unknownColumns.cend(), colsToIgnore.cbegin(), colsToIgnore.cend(),
907 std::inserter(intersection, intersection.begin()));
908 if (intersection.empty()) {
909 std::string errMsg = std::string("Unknown column") + (unknownColumns.size() > 1 ? "s: " : ": ");
910 for (auto &unknownColumn : unknownColumns)
911 errMsg += '"' + unknownColumn + "\", ";
912 errMsg.resize(errMsg.size() - 2); // remove last ", "
913 throw std::runtime_error(errMsg);
914 }
915 }
916
917 return selectedColumns;
918}
919
921 TTree *tree, RDataSource *ds, const std::string &context,
922 bool vector2RVec)
923{
924 auto toCheckedArgType = [&](const std::string &c) {
925 RDFDetail::RDefineBase *define = colRegister.GetDefine(c);
926 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2RVec);
927 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
928 const auto msg =
929 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
930 ") is not known to the interpreter, but a just-in-time-compiled " + context +
931 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
932 throw std::runtime_error(msg);
933 }
934 return colType;
935 };
936 std::vector<std::string> colTypes;
937 colTypes.reserve(colNames.size());
938 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
939 return colTypes;
940}
941
942/// Return a bitset each element of which indicates whether the corresponding element in `selectedColumns` is the
943/// name of a column that must be defined via datasource. All elements of the returned vector are false if no
944/// data-source is present.
946{
947 const auto nColumns = requestedCols.size();
948 std::vector<bool> mustBeDefined(nColumns, false);
949 for (auto i = 0u; i < nColumns; ++i)
951 return mustBeDefined;
952}
953
955{
956 std::unordered_set<std::string> uniqueCols;
957 for (auto &col : cols) {
958 if (!uniqueCols.insert(col).second) {
959 const auto msg = "Error: column \"" + col +
960 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
961 "readable with RDataFrame.";
962 throw std::logic_error(msg);
963 }
964 }
965}
966
967/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
968/// in the right positions (i.e. before the array branches that need them).
969std::pair<std::vector<std::string>, std::vector<std::string>>
971 std::vector<std::string> &&colsWithAliases)
972{
973 TTree *tree{};
974 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(ds))
975 tree = treeDS->GetTree();
976 if (!tree) // nothing to do
977 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
978
979 assert(colsWithoutAliases.size() == colsWithAliases.size());
980
981 auto nCols = colsWithoutAliases.size();
982 // Use index-iteration as we modify the vector during the iteration.
983 for (std::size_t i = 0u; i < nCols; ++i) {
984 const auto &colName = colsWithoutAliases[i];
985
986 auto *b = tree->GetBranch(colName.c_str());
987 if (!b) // try harder
988 b = tree->FindBranch(colName.c_str());
989
990 if (!b)
991 continue;
992
993 auto *leaves = b->GetListOfLeaves();
994 if (b->IsA() != TBranch::Class() || leaves->GetEntries() != 1)
995 continue; // this branch is not a variable-sized array, nothing to do
996
997 TLeaf *countLeaf = static_cast<TLeaf *>(leaves->At(0))->GetLeafCount();
998 if (!countLeaf || IsStrInVec(countLeaf->GetName(), colsWithoutAliases))
999 continue; // not a variable-sized array or the size branch is already there, nothing to do
1000
1001 // otherwise we must insert the size in colsWithoutAliases _and_ colsWithAliases
1002 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->GetName());
1003 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->GetName());
1004 ++nCols;
1005 ++i; // as we inserted an element in the vector we iterate over, we need to move the index forward one extra time
1006 }
1007
1008 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1009}
1010
1012{
1013 std::set<std::string> uniqueCols;
1014 columnNames.erase(
1015 std::remove_if(columnNames.begin(), columnNames.end(),
1016 [&uniqueCols](const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1017 columnNames.end());
1018}
1019
1021{
1023
1024 std::copy_if(columnNames.cbegin(), columnNames.cend(), std::back_inserter(parentFields),
1025 [](const std::string &colName) { return colName.find('.') == std::string::npos; });
1026
1027 columnNames.erase(std::remove_if(columnNames.begin(), columnNames.end(),
1028 [&parentFields](const std::string &colName) {
1029 if (colName.find('.') == std::string::npos)
1030 return false;
1031 const auto parentFieldName = colName.substr(0, colName.find_first_of('.'));
1032 return std::find(parentFields.cbegin(), parentFields.cend(), parentFieldName) !=
1033 parentFields.end();
1034 }),
1035 columnNames.end());
1036}
1037} // namespace RDF
1038} // namespace Internal
1039} // namespace ROOT
1040
1041namespace {
1042void AddDataSourceColumn(const std::string &colName, const std::type_info &typeID, ROOT::Detail::RDF::RLoopManager &lm,
1044{
1045
1046 if (colRegister.IsDefineOrAlias(colName))
1047 return;
1048
1049 if (lm.HasDataSourceColumnReaders(colName, typeID))
1050 return;
1051
1052 if (!ds.HasColumn(colName) &&
1053 lm.GetSuppressErrorsForMissingBranches().find(colName) == lm.GetSuppressErrorsForMissingBranches().end())
1054 return;
1055
1056 const auto nSlots = lm.GetNSlots();
1057 std::vector<std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>> colReaders;
1058 colReaders.reserve(nSlots);
1059 // TODO consider changing the interface so we return all of these for all slots in one go
1060 for (auto slot = 0u; slot < nSlots; ++slot)
1061 colReaders.emplace_back(
1062 ROOT::Internal::RDF::CreateColumnReader(ds, slot, colName, typeID, /*treeReader*/ nullptr));
1063
1064 lm.AddDataSourceColumnReaders(colName, std::move(colReaders), typeID);
1065}
1066} // namespace
1067
1068void ROOT::Internal::RDF::AddDSColumns(const std::vector<std::string> &colNames, ROOT::Detail::RDF::RLoopManager &lm,
1070 const std::vector<const std::type_info *> &colTypeIDs,
1072{
1073 auto nCols = colNames.size();
1074 assert(nCols == colTypeIDs.size() && "Must provide exactly one column type for each column to create");
1075 for (decltype(nCols) i{}; i < nCols; i++) {
1077 }
1078}
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
char name[80]
Definition TGX11.cxx:110
R__EXTERN TVirtualMutex * gROOTMutex
Definition TROOT.h:63
#define gROOT
Definition TROOT.h:411
#define R__LOCKGUARD(mutex)
#define free
Definition civetweb.c:1578
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
const_iterator begin() const
const_iterator end() const
static TClass * Class()
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2974
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition TPRegexp.h:78
Basic string class.
Definition TString.h:138
A TTree represents a columnar dataset.
Definition TTree.h:89
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:523
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
Definition RDFUtils.cxx:671
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:312
void RemoveRNTupleSubFields(ColumnNames_t &columnNames)
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:269
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:465
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:416
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap, const bool vector2RVec)
std::vector< std::string > ColumnNames_t
Namespace for new ROOT classes and functions.
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)
Definition vec.h:98