Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/RDataSource.hxx>
12#include <ROOT/RTTreeDS.hxx>
15#include <ROOT/RDF/RDisplay.hxx>
20#include "ROOT/RLogger.hxx"
22#include <ROOT/RDF/Utils.hxx>
23#include <string_view>
24#include <TBranch.h>
25#include <TClass.h>
26#include <TClassEdit.h>
27#include <TDataType.h>
28#include <TError.h>
29#include <TLeaf.h>
30#include <TObjArray.h>
31#include <TPRegexp.h>
32#include <TROOT.h>
33#include <TString.h>
34#include <TTree.h>
35#include <TVirtualMutex.h>
36
37// pragma to disable warnings on Rcpp which have
38// so many noise compiling
39#if defined(__GNUC__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Woverloaded-virtual"
42#pragma GCC diagnostic ignored "-Wshadow"
43#endif
44#include "lexertk.hpp"
45#if defined(__GNUC__)
46#pragma GCC diagnostic pop
47#endif
48
49#include <algorithm>
50#include <cassert>
51#include <cstdlib> // for size_t
52#include <iterator> // for back_insert_iterator
53#include <map>
54#include <memory>
55#include <set>
56#include <sstream>
57#include <stdexcept>
58#include <string>
59#include <typeinfo>
60#include <unordered_map>
61#include <unordered_set>
62#include <utility> // for pair
63#include <vector>
64
65namespace ROOT::Detail::RDF {
66class RDefineBase;
67}
68
69namespace {
72
73/// A string expression such as those passed to Filter and Define, digested to a standardized form
74struct ParsedExpression {
75 /// The string expression with the dummy variable names in fVarNames in place of the original column names
76 std::string fExpr;
77 /// The list of valid column names that were used in the original string expression.
78 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
79 ColumnNames_t fUsedCols;
80 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
81 ColumnNames_t fVarNames;
82};
83
84/// Look at expression `expr` and return a pair of (column names used, aliases used)
85std::pair<ColumnNames_t, ColumnNames_t> FindUsedColsAndAliases(const std::string &expr,
87 const ColumnNames_t &dataSourceColNames)
88{
89 lexertk::generator tokens;
90 const auto tokensOk = tokens.process(expr);
91 if (!tokensOk) {
92 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
93 throw std::runtime_error(msg);
94 }
95
96 std::unordered_set<std::string> usedCols;
97 std::unordered_set<std::string> usedAliases;
98
99 // iterate over tokens in expression and fill usedCols and usedAliases
100 const auto nTokens = tokens.size();
101 const auto kSymbol = lexertk::token::e_symbol;
102 for (auto i = 0u; i < nTokens; ++i) {
103 const auto &tok = tokens[i];
104 // lexertk classifies '&' as e_symbol for some reason
105 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
106 // token is not a potential variable name, skip it
107 continue;
108 }
109
110 ColumnNames_t potentialColNames({tok.value});
111
112 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
113 auto dotChainKeepsGoing = [&](unsigned int _i) {
114 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
115 };
116 while (dotChainKeepsGoing(i)) {
117 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
118 i += 2; // consume the tokens we looked at
119 }
120
121 // in an expression such as `a.b`, if `a` is a column alias add it to `usedAliases` and
122 // replace the alias with the real column name in `potentialColNames`.
123 const auto maybeAnAlias = potentialColNames[0]; // intentionally a copy as we'll modify potentialColNames later
124 const auto &resolvedAlias = colRegister.ResolveAlias(maybeAnAlias);
125 if (resolvedAlias != maybeAnAlias) { // this is an alias
127 for (auto &s : potentialColNames)
128 s.replace(0, maybeAnAlias.size(), resolvedAlias);
129 }
130
131 // find the longest potential column name that is an actual column name
132 // (potential columns are sorted by length, so we search from the end to find the longest)
133 auto isRDFColumn = [&](const std::string &col) {
134 if (colRegister.IsDefineOrAlias(col) || IsStrInVec(col, dataSourceColNames))
135 return true;
136 return false;
137 };
138 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
141 }
142
143 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
144}
145
146/// Substitute each '.' in a string with '\.'
147std::string EscapeDots(const std::string &s)
148{
149 TString out(s);
150 TPRegexp dot("\\.");
151 dot.Substitute(out, "\\.", "g");
152 return std::string(std::move(out));
153}
154
155TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
157{
158 TString out(expr);
159
160 for (const auto &alias : usedAliases) {
161 const auto &col = colRegister.ResolveAlias(alias);
162 TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
163 replacer.Substitute(out, col.data(), "g");
164 }
165
166 return out;
167}
168
169ParsedExpression ParseRDFExpression(std::string_view expr, const ROOT::Internal::RDF::RColumnRegister &colRegister,
170 const ColumnNames_t &dataSourceColNames)
171{
172 // transform `#var` into `R_rdf_sizeof_var`
174 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
176 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
177 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
178
179 ColumnNames_t usedCols;
180 ColumnNames_t usedAliases;
181 std::tie(usedCols, usedAliases) =
183
185
186 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
187 // the dummy variable names in varNames
189
190 ColumnNames_t varNames(usedCols.size());
191 for (auto i = 0u; i < varNames.size(); ++i)
192 varNames[i] = "var" + std::to_string(i);
193
194 // sort the vector usedColsAndAliases by decreasing length of its elements,
195 // so in case of friends we guarantee we never substitute a column name with another column containing it
196 // ex. without sorting when passing "x" and "fr.x", the replacer would output "var0" and "fr.var0",
197 // because it has already substituted "x", hence the "x" in "fr.x" would be recognized as "var0",
198 // whereas the desired behaviour is handling them as "var0" and "var1"
199 std::sort(usedCols.begin(), usedCols.end(),
200 [](const std::string &a, const std::string &b) { return a.size() > b.size(); });
201 for (const auto &col : usedCols) {
202 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
203 TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
204 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
205 }
206
207 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
208}
209
210/// Return the static global map of Filter/Define functions that have been jitted.
211/// It's used to check whether a given expression has already been jitted, and
212/// to look up its associated variable name if it is.
213/// Keys in the map are the body of the expression, values are the name of the
214/// jitted variable that corresponds to that expression. For example, for:
215/// auto f1(){ return 42; }
216/// key would be "(){ return 42; }" and value would be "f1".
217std::unordered_map<std::string, std::string> &GetJittedExprs() {
218 static std::unordered_map<std::string, std::string> jittedExpressions;
219 return jittedExpressions;
220}
221
222std::string
223BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
224{
225 assert(vars.size() == varTypes.size());
226
227 TPRegexp re(R"(\breturn\b)");
228 const bool hasReturnStmt = re.MatchB(expr);
229
230 static const std::vector<std::string> fundamentalTypes = {
231 "int",
232 "signed",
233 "signed int",
234 "Int_t",
235 "unsigned",
236 "unsigned int",
237 "UInt_t",
238 "double",
239 "Double_t",
240 "float",
241 "Float_t",
242 "char",
243 "Char_t",
244 "unsigned char",
245 "UChar_t",
246 "bool",
247 "Bool_t",
248 "short",
249 "short int",
250 "Short_t",
251 "long",
252 "long int",
253 "long long int",
254 "Long64_t",
255 "unsigned long",
256 "unsigned long int",
257 "ULong64_t",
258 "std::size_t",
259 "size_t",
260 "Ssiz_t"
261 };
262
263 std::stringstream ss;
264 ss << "(";
265 for (auto i = 0u; i < vars.size(); ++i) {
266 std::string fullType;
267 const auto &type = varTypes[i];
269 // pass it by const value to help detect common mistakes such as if(x = 3)
270 fullType = "const " + type + " ";
271 } else {
272 // We pass by reference to avoid expensive copies
273 // It can't be const reference in general, as users might want/need to call non-const methods on the values
274 fullType = type + "& ";
275 }
276 ss << fullType << vars[i] << ", ";
277 }
278 if (!vars.empty())
279 ss.seekp(-2, ss.cur);
280
281 if (hasReturnStmt)
282 ss << "){";
283 else
284 ss << "){return ";
285 ss << expr << "\n;}";
286
287 return ss.str();
288}
289
290/// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function.
291/// If the function is already in GetJittedExprs, return the name for the function that has already been jitted.
292std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
293{
295
296 const auto funcCode = BuildFunctionString(expr, vars, varTypes);
297 auto &exprMap = GetJittedExprs();
298 const auto exprIt = exprMap.find(funcCode);
299 if (exprIt != exprMap.end()) {
300 // expression already there
301 const auto funcName = exprIt->second;
302 return funcName;
303 }
304
305 // new expression
306 const auto funcBaseName = "func" + std::to_string(exprMap.size());
307 const auto funcFullName = "R_rdf::" + funcBaseName;
308
309 const auto toDeclare = "namespace R_rdf {\nauto " + funcBaseName + funcCode + "\nusing " + funcBaseName +
310 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
311 ")>::ret_type;\n}";
313
314 // InterpreterDeclare could throw. If it doesn't, mark the function as already jitted
315 exprMap.insert({funcCode, funcFullName});
316
317 return funcFullName;
318}
319
320/// Each jitted function comes with a func_ret_t type alias for its return type.
321/// Resolve that alias and return the true type as string.
322std::string RetTypeOfFunc(const std::string &funcName)
323{
324 const auto dt = gROOT->GetType((funcName + "_ret_t").c_str());
325 R__ASSERT(dt != nullptr);
326 const auto type = dt->GetFullTypeName();
327 return type;
328}
329
330[[noreturn]] void
331ThrowJitBuildActionHelperTypeError(const std::string &actionTypeNameBase, const std::type_info &helperArgType)
332{
333 int err = 0;
335 std::string actionHelperTypeName = cname;
336 delete[] cname;
337 if (err != 0)
339
340 std::string exceptionText =
341 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + "\" action using helper type \"" +
343 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
344 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
345 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
346 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
347 "interpreter, e.g. via gInterpreter->Declare.";
348
349 throw std::runtime_error(exceptionText);
350}
351
352} // anonymous namespace
353
354namespace ROOT {
355namespace Internal {
356namespace RDF {
357
358/// Take a list of column names, return that list with entries starting by '#' filtered out.
359/// The function throws when filtering out a column this way.
361{
364 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
365 [&](const std::string &name) {
366 if (name[0] == '#') {
367 filteredColumns.emplace_back(name);
368 return false;
369 } else {
370 return true;
371 }
372 });
373
374 if (!filteredColumns.empty()) {
375 std::string msg = "Column name(s) {";
376 for (auto &c : filteredColumns)
377 msg += c + ", ";
378 msg[msg.size() - 2] = '}';
379 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
380 throw std::runtime_error(msg);
381 }
382
384}
385
386void CheckValidCppVarName(std::string_view var, const std::string &where)
387{
388 bool isValid = true;
389
390 if (var.empty())
391 isValid = false;
392 const char firstChar = var[0];
393
394 // first character must be either a letter or an underscore
395 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
396 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
397 if (!isValidFirstChar)
398 isValid = false;
399
400 // all characters must be either a letter, an underscore or a number
401 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
402 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
403 for (const char c : var)
404 if (!isValidTok(c))
405 isValid = false;
406
407 if (!isValid) {
408 const auto objName = where == "Define" ? "column" : "variation";
409 const auto error = "RDataFrame::" + where + ": cannot define " + objName + " \"" + std::string(var) +
410 "\". Not a valid C++ variable name.";
411 throw std::runtime_error(error);
412 }
413}
414
415std::string DemangleTypeIdName(const std::type_info &typeInfo)
416{
417 int dummy(0);
419 std::string tname(tn);
420 free(tn);
421 return tname;
422}
423
424ColumnNames_t
425ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
426{
427 const auto theRegexSize = columnNameRegexp.size();
428 std::string theRegex(columnNameRegexp);
429
430 const auto isEmptyRegex = 0 == theRegexSize;
431 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
432 if (theRegexSize > 0 && theRegex[0] != '^')
433 theRegex = "^" + theRegex;
434 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
435 theRegex = theRegex + "$";
436
438
439 // Since we support gcc48 and it does not provide in its stl std::regex,
440 // we need to use TPRegexp
441 TPRegexp regexp(theRegex);
442 for (auto &&colName : colNames) {
443 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
444 selectedColumns.emplace_back(colName);
445 }
446 }
447
448 if (selectedColumns.empty()) {
449 std::string text(callerName);
450 if (columnNameRegexp.empty()) {
451 text = ": there is no column available to match.";
452 } else {
453 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
454 }
455 throw std::runtime_error(text);
456 }
457 return selectedColumns;
458}
459
460/// Throw if column `definedColView` is already there.
461void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
463{
464
465 std::string error{};
466 if (colRegister.IsAlias(definedColView))
467 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
468 "\", already exists in this branch of the computation graph.";
469 else if (colRegister.IsDefineOrAlias(definedColView))
470 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
472 error =
473 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
474
475 if (!error.empty()) {
476 error = "RDataFrame::" + where + ": cannot define column \"" + std::string(definedColView) + "\". " + error;
477 throw std::runtime_error(error);
478 }
479}
480
481/// Throw if column `definedColView` is _not_ already there.
482void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
484{
485 std::string error{};
486
487 if (colRegister.IsAlias(definedColView)) {
488 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
489 "\", already exists. Aliases cannot be Redefined or Varied.";
490 }
491
492 if (error.empty()) {
493 const bool isAlreadyDefined = colRegister.IsDefineOrAlias(definedColView);
494 const bool isADSColumn =
496
498 error = "No column with that name was found in the dataset. Use Define to create a new column.";
499 }
500
501 if (!error.empty()) {
502 if (where == "DefaultValueFor")
503 error = "RDataFrame::" + where + ": cannot provide default values for column \"" +
504 std::string(definedColView) + "\". " + error;
505 else
506 error = "RDataFrame::" + where + ": cannot redefine or vary column \"" + std::string(definedColView) + "\". " +
507 error;
508 throw std::runtime_error(error);
509 }
510}
511
512/// Throw if the column has systematic variations attached.
513void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
514{
515 const std::string definedCol(definedColView);
516 const auto &variationDeps = colRegister.GetVariationDeps(definedCol);
517 if (!variationDeps.empty()) {
518 if (where == "Redefine") {
519 const std::string error = "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
520 "\". The column depends on one or more systematic variations and re-defining varied "
521 "columns is not supported.";
522 throw std::runtime_error(error);
523 } else if (where == "DefaultValueFor") {
524 const std::string error = "RDataFrame::" + where + ": cannot provide a default value for column \"" +
525 definedCol +
526 "\". The column depends on one or more systematic variations and it should not be "
527 "possible to have missing values in varied columns.";
528 throw std::runtime_error(error);
529 } else {
530 const std::string error =
531 "RDataFrame::" + where + ": this operation cannot work with columns that depend on systematic variations.";
532 throw std::runtime_error(error);
533 }
534 }
535}
536
537void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
538{
540 std::string err_msg = "The number of template parameters specified is ";
541 err_msg += std::to_string(nTemplateParams);
542 err_msg += " while ";
543 err_msg += std::to_string(nColumnNames);
544 err_msg += " columns have been specified.";
545 throw std::runtime_error(err_msg);
546 }
547}
548
549/// Choose between local column names or default column names, throw in case of errors.
550const ColumnNames_t
552{
553 if (names.empty()) {
554 // use default column names
555 if (defaultNames.size() < nRequiredNames)
556 throw std::runtime_error(
557 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
558 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
559 // return first nRequiredNames default column names
561 } else {
562 // use column names provided by the user to this particular transformation/action
563 if (names.size() != nRequiredNames) {
564 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
565 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
566 " provided:";
567 for (const auto &name : names)
568 msg += " \"" + name + "\",";
569 msg.back() = '.';
570 throw std::runtime_error(msg);
571 }
572 return names;
573 }
574}
575
578{
580 for (auto &column : requiredCols) {
581 if (definedCols.IsDefineOrAlias(column))
582 continue;
583 const auto isDataSourceColumn =
586 continue;
587 unknownColumns.emplace_back(column);
588 }
589 return unknownColumns;
590}
591
592std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
593{
594 return loopManager->GetFiltersNames();
595}
596
598{
599 // split name into directory and treename if needed
600 std::string_view dirName = "";
601 std::string_view treeName = fullTreeName;
602 const auto lastSlash = fullTreeName.rfind('/');
603 if (std::string_view::npos != lastSlash) {
604 dirName = treeName.substr(0, lastSlash);
605 treeName = treeName.substr(lastSlash + 1, treeName.size());
606 }
607 return {std::string(treeName), std::string(dirName)};
608}
609
610std::string PrettyPrintAddr(const void *const addr)
611{
612 std::stringstream s;
613 // Windows-friendly
615 return s.str();
616}
617
618/// Book the jitting of a Filter call
619std::shared_ptr<RDFDetail::RJittedFilter>
620BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> prevNode, std::string_view name, std::string_view expression,
622{
623 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
624
625 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
626 const auto exprVarTypes =
627 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Filter", /*vector2RVec=*/true);
628 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
629 const auto type = RetTypeOfFunc(funcName);
630 if (type != "bool")
631 std::runtime_error("Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
632
633 auto *lm = prevNode->GetLoopManagerUnchecked();
634 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
635 lm, name, Union(colRegister.GetVariationDeps(parsedExpr.fUsedCols), prevNode->GetVariations()), prevNode);
636
637 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
638 std::stringstream filterInvocation;
639 filterInvocation << "(const std::vector<std::string> &colNames, "
640 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
641 << "ROOT::Detail::RDF::RLoopManager &lm, "
642 << "void *jittedFilter, "
643 << "std::shared_ptr<void> *) {\n";
644 filterInvocation << " ROOT::Internal::RDF::JitFilterHelper(" << funcName << ", "
645 << "colNames, "
646 << "colRegister, "
647 << "lm, "
648 << "reinterpret_cast<ROOT::Detail::RDF::RJittedFilter*>(jittedFilter)"
649 << ");\n}\n";
650 lm->RegisterJitHelperCall(filterInvocation.str(),
651 std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
653
654 return jittedFilter;
655}
656
657/// Book the jitting of a Define call
658std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
660{
661 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
662
663 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
664 const auto exprVarTypes =
665 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Define", /*vector2RVec=*/true);
666 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
667 const auto type = RetTypeOfFunc(funcName);
668
669 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, colRegister, parsedExpr.fUsedCols);
670
671 // lifetime of pointees:
672 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
673 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
674 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
675 std::stringstream defineInvocation;
676 defineInvocation << "(const std::vector<std::string> &colNames, "
677 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
678 << "ROOT::Detail::RDF::RLoopManager &lm, "
679 << "void *jittedDefine, "
680 << "std::shared_ptr<void> *) {\n";
681 defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
682 << funcName << ", "
683 << "colNames, "
684 << "colRegister, "
685 << "lm, "
686 << "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
687 << ");\n}\n";
688 lm.RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
689 parsedExpr.fUsedCols, jittedDefine);
690
691 return jittedDefine;
692}
693
694/// Book the jitting of a DefinePerSample call
695std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
697{
698 const auto funcName = DeclareFunction(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
699 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
700 const auto retType = RetTypeOfFunc(funcName);
701
702 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, colRegister, ColumnNames_t{});
703
704 // lifetime of pointees:
705 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
706 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
707 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
708 std::stringstream defineInvocation;
709 defineInvocation << "(const std::vector<std::string> &colNames, "
710 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
711 << "ROOT::Detail::RDF::RLoopManager &lm, "
712 << "void *jittedDefine, "
713 << "std::shared_ptr<void> *) {\n";
714 defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
715 << funcName << ", "
716 << "colNames, "
717 << "colRegister, "
718 << "lm, "
719 << "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
720 << ");\n}\n";
721 lm.RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
722 {}, jittedDefine);
723 return jittedDefine;
724}
725
726/// Book the jitting of a Vary call
727std::shared_ptr<RJittedVariation>
728BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
729 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
731{
732 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
733
734 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
735 const auto exprVarTypes =
736 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Vary", /*vector2RVec=*/true);
737 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
738 const auto type = RetTypeOfFunc(funcName);
739
740 if (type.rfind("ROOT::VecOps::RVec", 0) != 0) {
741 throw std::runtime_error(
742 "Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
743 " instead:\n" + parsedExpr.fExpr);
744 }
745
746 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
747 lm, parsedExpr.fUsedCols);
748
749 // build invocation to JitVariationHelper
750 // variation tag (array of strings) passed as const char** plus size.
751 // lifetime of pointees:
752 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
753 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
754 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
755 // - variedColsOnHeap: deleted by registration function
756 std::stringstream varyInvocation;
757 varyInvocation << "(const std::vector<std::string> &inputColNames, "
758 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
759 << "ROOT::Detail::RDF::RLoopManager &lm, "
760 << "void *jittedVariation, "
761 << "std::shared_ptr<void> *helperArg) {\n";
763 << " auto *variedColNamesAndTags = reinterpret_cast<std::shared_ptr<std::pair<std::vector<std::string>, "
764 "std::vector<std::string>>> *>(helperArg);"
765 << " ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? "true" : "false") << ">(" << funcName
766 << ", "
767 << "inputColNames, "
768 << "colRegister, "
769 << "lm, "
770 << "reinterpret_cast<ROOT::Internal::RDF::RJittedVariation *>(jittedVariation), "
771 << "(*variedColNamesAndTags)->first, "
772 << "(*variedColNamesAndTags)->second"
773 << ");\n}\n";
774 lm.RegisterJitHelperCall(
775 varyInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
777 std::make_shared<std::pair<std::vector<std::string>, std::vector<std::string>>>(colNames, variationTags));
778 return jittedVariation;
779}
780
781// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
782// (see comments in the body for actual jitted code)
783std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at,
784 TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
785 const bool vector2RVec)
786{
787 // retrieve type of action as a string
789 if (!actionTypeClass) {
790 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
791 throw std::runtime_error(exceptionText);
792 }
793 const std::string actionTypeName = actionTypeClass->GetName();
794 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
795
796 // retrieve type of result of the action as a string
798 if (helperArgTypeName.empty()) {
800 }
801
802 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
803 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
804 std::stringstream createAction_str;
805 createAction_str << "(const std::vector<std::string> &colNames, "
806 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
807 << "ROOT::Detail::RDF::RLoopManager &lm, "
808 << "void *jittedAction, "
809 << "std::shared_ptr<void> *helperArg) {\n";
810 createAction_str << " ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
812 for (auto &colType : columnTypeNames)
813 createAction_str << ", " << colType;
814 createAction_str << ">("
815 << "colNames, "
816 << "colRegister, "
817 << "lm, "
818 << "reinterpret_cast<ROOT::Internal::RDF::RJittedAction *>(jittedAction), " << nSlots << ", "
819 << "reinterpret_cast<std::shared_ptr<" << helperArgTypeName << "> *>(helperArg)"
820 << ");\n}\n";
821 return createAction_str.str();
822}
823
824bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
825{
826 for (const auto &s : strings) {
827 if (s.empty())
828 return true;
829 }
830 return false;
831}
832
833std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
834{
835 return ptr;
836}
837
838/// Given the desired number of columns and the user-provided list of columns:
839/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
840/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
841/// * replace column names from aliases by the actual column name
842/// Return the list of selected column names.
845{
846 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
847
848 for (auto &col : selectedColumns) {
849 col = colRegister.ResolveAlias(col);
850 }
851
852 // Complain if there are still unknown columns at this point
854
855 if (!unknownColumns.empty()) {
856 // Some columns are still unknown, we need to understand if the error
857 // should be printed or if the user requested to explicitly disable it.
858 // Look for a possible overlap between the unknown columns and the
859 // columns we should ignore for the purpose of the following exception
860 std::set<std::string> intersection;
861 const auto &colsToIgnore = lm.GetSuppressErrorsForMissingBranches();
862 std::sort(unknownColumns.begin(), unknownColumns.end());
863 std::set_intersection(unknownColumns.cbegin(), unknownColumns.cend(), colsToIgnore.cbegin(), colsToIgnore.cend(),
864 std::inserter(intersection, intersection.begin()));
865 if (intersection.empty()) {
866 std::string errMsg = std::string("Unknown column") + (unknownColumns.size() > 1 ? "s: " : ": ");
867 for (auto &unknownColumn : unknownColumns)
868 errMsg += '"' + unknownColumn + "\", ";
869 errMsg.resize(errMsg.size() - 2); // remove last ", "
870 throw std::runtime_error(errMsg);
871 }
872 }
873
874 return selectedColumns;
875}
876
878 TTree *tree, RDataSource *ds, const std::string &context,
879 bool vector2RVec)
880{
881 auto toCheckedArgType = [&](const std::string &c) {
882 RDFDetail::RDefineBase *define = colRegister.GetDefine(c);
883 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2RVec);
884 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
885 const auto msg =
886 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
887 ") is not known to the interpreter, but a just-in-time-compiled " + context +
888 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
889 throw std::runtime_error(msg);
890 }
891 return colType;
892 };
893 std::vector<std::string> colTypes;
894 colTypes.reserve(colNames.size());
895 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
896 return colTypes;
897}
898
900{
901 std::unordered_set<std::string> uniqueCols;
902 for (auto &col : cols) {
903 if (!uniqueCols.insert(col).second) {
904 const auto msg = "Error: column \"" + col +
905 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
906 "readable with RDataFrame.";
907 throw std::logic_error(msg);
908 }
909 }
910}
911
913{
915 std::string optionName;
916
917 if (opts.fOutputFormat == ROOT::RDF::ESnapshotOutputFormat::kTTree ||
919 if (opts.fApproxZippedClusterSize != defaultSnapshotOpts.fApproxZippedClusterSize) {
920 optionName = "fApproxZippedClusterSize";
921 } else if (opts.fMaxUnzippedClusterSize != defaultSnapshotOpts.fMaxUnzippedClusterSize) {
922 optionName = "fMaxUnzippedClusterSize";
923 } else if (opts.fInitialUnzippedPageSize != defaultSnapshotOpts.fInitialUnzippedPageSize) {
924 optionName = "fInitialUnzippedPageSize";
925 } else if (opts.fMaxUnzippedPageSize != defaultSnapshotOpts.fMaxUnzippedPageSize) {
926 optionName = "fMaxUnzippedPageSize";
927 } else if (opts.fEnablePageChecksums != defaultSnapshotOpts.fEnablePageChecksums) {
928 optionName = "fEnablePageChecksums";
929 } else if (opts.fEnableSamePageMerging != defaultSnapshotOpts.fEnableSamePageMerging) {
930 optionName = "fEnableSamePageMerging";
931 }
932
933 if (!optionName.empty()) {
934 Warning("Snapshot",
935 "The RNTuple-specific %s option in RSnapshotOptions has been set, but the output format is "
936 "set to TTree, so this option won't have any effect. Use the other options available in "
937 "RSnapshotOptions to "
938 "configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.",
939 optionName.c_str());
940 }
941 } else if (opts.fOutputFormat == ROOT::RDF::ESnapshotOutputFormat::kRNTuple) {
942 if (opts.fAutoFlush != defaultSnapshotOpts.fAutoFlush) {
943 optionName = "fAutoFlush";
944 } else if (opts.fSplitLevel != defaultSnapshotOpts.fSplitLevel) {
945 optionName = "fSplitLevel";
946 } else if (opts.fBasketSize != defaultSnapshotOpts.fBasketSize) {
947 optionName = "fBasketSize";
948 }
949
950 if (!optionName.empty()) {
951 Warning(
952 "Snapshot",
953 "The TTree-specific %s option in RSnapshotOptions has been set, but the output format is set to RNTuple, "
954 "so this option won't have any effect. Use the fNTupleWriteOptions option available in RSnapshotOptions to "
955 "configure the output RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.",
956 optionName.c_str());
957 }
958 }
959}
960
961/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
962/// in the right positions (i.e. before the array branches that need them).
963std::pair<std::vector<std::string>, std::vector<std::string>>
965 std::vector<std::string> &&colsWithAliases)
966{
967 TTree *tree{};
968 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(ds))
969 tree = treeDS->GetTree();
970 if (!tree) // nothing to do
971 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
972
973 assert(colsWithoutAliases.size() == colsWithAliases.size());
974
975 auto nCols = colsWithoutAliases.size();
976 // Use index-iteration as we modify the vector during the iteration.
977 for (std::size_t i = 0u; i < nCols; ++i) {
978 const auto &colName = colsWithoutAliases[i];
979
980 auto *b = tree->GetBranch(colName.c_str());
981 if (!b) // try harder
982 b = tree->FindBranch(colName.c_str());
983
984 if (!b)
985 continue;
986
987 auto *leaves = b->GetListOfLeaves();
988 if (b->IsA() != TBranch::Class() || leaves->GetEntries() != 1)
989 continue; // this branch is not a variable-sized array, nothing to do
990
991 TLeaf *countLeaf = static_cast<TLeaf *>(leaves->At(0))->GetLeafCount();
992 if (!countLeaf || IsStrInVec(countLeaf->GetName(), colsWithoutAliases))
993 continue; // not a variable-sized array or the size branch is already there, nothing to do
994
995 // otherwise we must insert the size in colsWithoutAliases _and_ colsWithAliases
996 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->GetName());
997 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->GetName());
998 ++nCols;
999 ++i; // as we inserted an element in the vector we iterate over, we need to move the index forward one extra time
1000 }
1001
1002 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1003}
1004
1006{
1007 std::set<std::string> uniqueCols;
1008 columnNames.erase(
1009 std::remove_if(columnNames.begin(), columnNames.end(),
1010 [&uniqueCols](const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1011 columnNames.end());
1012}
1013
1015{
1017
1018 std::copy_if(columnNames.cbegin(), columnNames.cend(), std::back_inserter(parentFields),
1019 [](const std::string &colName) { return colName.find('.') == std::string::npos; });
1020
1021 columnNames.erase(std::remove_if(columnNames.begin(), columnNames.end(),
1022 [&parentFields](const std::string &colName) {
1023 if (colName.find('.') == std::string::npos)
1024 return false;
1025 const auto parentFieldName = colName.substr(0, colName.find_first_of('.'));
1026 return std::find(parentFields.cbegin(), parentFields.cend(), parentFieldName) !=
1027 parentFields.end();
1028 }),
1029 columnNames.end());
1030}
1031} // namespace RDF
1032} // namespace Internal
1033} // namespace ROOT
1034
1035namespace {
1036void AddDataSourceColumn(const std::string &colName, const std::type_info &typeID, ROOT::Detail::RDF::RLoopManager &lm,
1038{
1039
1040 if (colRegister.IsDefineOrAlias(colName))
1041 return;
1042
1043 if (lm.HasDataSourceColumnReaders(colName, typeID))
1044 return;
1045
1046 if (!ds.HasColumn(colName) &&
1047 lm.GetSuppressErrorsForMissingBranches().find(colName) == lm.GetSuppressErrorsForMissingBranches().end())
1048 return;
1049
1050 const auto nSlots = lm.GetNSlots();
1051 std::vector<std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>> colReaders;
1052 colReaders.reserve(nSlots);
1053 // TODO consider changing the interface so we return all of these for all slots in one go
1054 for (auto slot = 0u; slot < nSlots; ++slot)
1055 colReaders.emplace_back(
1056 ROOT::Internal::RDF::CreateColumnReader(ds, slot, colName, typeID, /*treeReader*/ nullptr));
1057
1058 lm.AddDataSourceColumnReaders(colName, std::move(colReaders), typeID);
1059}
1060} // namespace
1061
1062void ROOT::Internal::RDF::AddDSColumns(const std::vector<std::string> &colNames, ROOT::Detail::RDF::RLoopManager &lm,
1064 const std::vector<const std::type_info *> &colTypeIDs,
1066{
1067 auto nCols = colNames.size();
1068 assert(nCols == colTypeIDs.size() && "Must provide exactly one column type for each column to create");
1069 for (decltype(nCols) i{}; i < nCols; i++) {
1071 }
1072}
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
char name[80]
Definition TGX11.cxx:110
R__EXTERN TVirtualMutex * gROOTMutex
Definition TROOT.h:63
#define gROOT
Definition TROOT.h:414
#define R__LOCKGUARD(mutex)
#define free
Definition civetweb.c:1578
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
const_iterator begin() const
const_iterator end() const
static TClass * Class()
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2973
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition TPRegexp.h:78
Basic string class.
Definition TString.h:138
A TTree represents a columnar dataset.
Definition TTree.h:89
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister)
Book the jitting of a DefinePerSample call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:191
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:536
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > prevNode, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, const bool vector2RVec)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
Definition RDFUtils.cxx:684
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:325
void RemoveRNTupleSubFields(ColumnNames_t &columnNames)
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:272
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:478
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn)
Book the jitting of a Vary call.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:429
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister)
Book the jitting of a Define call.
std::vector< std::string > ColumnNames_t
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)
Definition vec.h:98
A collection of options to steer the creation of the dataset on disk through Snapshot().