Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/RDataSource.hxx>
12#include <ROOT/RTTreeDS.hxx>
15#include <ROOT/RDF/RDisplay.hxx>
20#include "ROOT/RLogger.hxx"
22#include <ROOT/RDF/Utils.hxx>
23#include <string_view>
24#include <TBranch.h>
25#include <TClass.h>
26#include <TClassEdit.h>
27#include <TDataType.h>
28#include <TError.h>
29#include <TLeaf.h>
30#include <TObjArray.h>
31#include <TPRegexp.h>
32#include <TROOT.h>
33#include <TString.h>
34#include <TTree.h>
35#include <TVirtualMutex.h>
36
37// pragma to disable warnings on Rcpp which have
38// so many noise compiling
39#if defined(__GNUC__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Woverloaded-virtual"
42#pragma GCC diagnostic ignored "-Wshadow"
43#endif
44#include "lexertk.hpp"
45#if defined(__GNUC__)
46#pragma GCC diagnostic pop
47#endif
48
49#include <algorithm>
50#include <cassert>
51#include <cstdlib> // for size_t
52#include <iterator> // for back_insert_iterator
53#include <map>
54#include <memory>
55#include <set>
56#include <sstream>
57#include <stdexcept>
58#include <string>
59#include <typeinfo>
60#include <unordered_map>
61#include <unordered_set>
62#include <utility> // for pair
63#include <vector>
64
65namespace ROOT::Detail::RDF {
66class RDefineBase;
67}
68
69namespace {
72
73/// A string expression such as those passed to Filter and Define, digested to a standardized form
74struct ParsedExpression {
75 /// The string expression with the dummy variable names in fVarNames in place of the original column names
76 std::string fExpr;
77 /// The list of valid column names that were used in the original string expression.
78 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
79 ColumnNames_t fUsedCols;
80 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
81 ColumnNames_t fVarNames;
82};
83
84/// Look at expression `expr` and return a pair of (column names used, aliases used)
85std::pair<ColumnNames_t, ColumnNames_t> FindUsedColsAndAliases(const std::string &expr,
86 const ROOT::Internal::RDF::RColumnRegister &colRegister,
87 const ColumnNames_t &dataSourceColNames)
88{
89 lexertk::generator tokens;
90 const auto tokensOk = tokens.process(expr);
91 if (!tokensOk) {
92 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
93 throw std::runtime_error(msg);
94 }
95
96 std::unordered_set<std::string> usedCols;
97 std::unordered_set<std::string> usedAliases;
98
99 // iterate over tokens in expression and fill usedCols and usedAliases
100 const auto nTokens = tokens.size();
101 const auto kSymbol = lexertk::token::e_symbol;
102 for (auto i = 0u; i < nTokens; ++i) {
103 const auto &tok = tokens[i];
104 // lexertk classifies '&' as e_symbol for some reason
105 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
106 // token is not a potential variable name, skip it
107 continue;
108 }
109
110 ColumnNames_t potentialColNames({tok.value});
111
112 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
113 auto dotChainKeepsGoing = [&](unsigned int _i) {
114 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
115 };
116 while (dotChainKeepsGoing(i)) {
117 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
118 i += 2; // consume the tokens we looked at
119 }
120
121 // in an expression such as `a.b`, if `a` is a column alias add it to `usedAliases` and
122 // replace the alias with the real column name in `potentialColNames`.
123 const auto maybeAnAlias = potentialColNames[0]; // intentionally a copy as we'll modify potentialColNames later
124 const auto &resolvedAlias = colRegister.ResolveAlias(maybeAnAlias);
125 if (resolvedAlias != maybeAnAlias) { // this is an alias
126 usedAliases.insert(maybeAnAlias);
127 for (auto &s : potentialColNames)
128 s.replace(0, maybeAnAlias.size(), resolvedAlias);
129 }
130
131 // find the longest potential column name that is an actual column name
132 // (potential columns are sorted by length, so we search from the end to find the longest)
133 auto isRDFColumn = [&](const std::string &col) {
134 if (colRegister.IsDefineOrAlias(col) || IsStrInVec(col, dataSourceColNames))
135 return true;
136 return false;
137 };
138 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
139 if (longestRDFColMatch != potentialColNames.crend())
140 usedCols.insert(*longestRDFColMatch);
141 }
142
143 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
144}
145
146/// Substitute each '.' in a string with '\.'
147std::string EscapeDots(const std::string &s)
148{
149 TString out(s);
150 TPRegexp dot("\\.");
151 dot.Substitute(out, "\\.", "g");
152 return std::string(std::move(out));
153}
154
155TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
156 const ROOT::Internal::RDF::RColumnRegister &colRegister)
157{
158 TString out(expr);
159
160 for (const auto &alias : usedAliases) {
161 const auto &col = colRegister.ResolveAlias(alias);
162 TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
163 replacer.Substitute(out, col.data(), "g");
164 }
165
166 return out;
167}
168
169ParsedExpression ParseRDFExpression(std::string_view expr, const ROOT::Internal::RDF::RColumnRegister &colRegister,
170 const ColumnNames_t &dataSourceColNames)
171{
172 // transform `#var` into `R_rdf_sizeof_var`
173 TString preProcessedExpr(expr);
174 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
175 TPRegexp colSizeReplacer(
176 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
177 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
178
179 ColumnNames_t usedCols;
180 ColumnNames_t usedAliases;
181 std::tie(usedCols, usedAliases) =
182 FindUsedColsAndAliases(std::string(preProcessedExpr), colRegister, dataSourceColNames);
183
184 const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
185
186 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
187 // the dummy variable names in varNames
188 TString exprWithVars(exprNoAliases);
189
190 ColumnNames_t varNames(usedCols.size());
191 for (auto i = 0u; i < varNames.size(); ++i)
192 varNames[i] = "var" + std::to_string(i);
193
194 // sort the vector usedColsAndAliases by decreasing length of its elements,
195 // so in case of friends we guarantee we never substitute a column name with another column containing it
196 // ex. without sorting when passing "x" and "fr.x", the replacer would output "var0" and "fr.var0",
197 // because it has already substituted "x", hence the "x" in "fr.x" would be recognized as "var0",
198 // whereas the desired behaviour is handling them as "var0" and "var1"
199 std::sort(usedCols.begin(), usedCols.end(),
200 [](const std::string &a, const std::string &b) { return a.size() > b.size(); });
201 for (const auto &col : usedCols) {
202 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
203 TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
204 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
205 }
206
207 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
208}
209
210/// Return the static global map of Filter/Define functions that have been jitted.
211/// It's used to check whether a given expression has already been jitted, and
212/// to look up its associated variable name if it is.
213/// Keys in the map are the body of the expression, values are the name of the
214/// jitted variable that corresponds to that expression. For example, for:
215/// auto f1(){ return 42; }
216/// key would be "(){ return 42; }" and value would be "f1".
217std::unordered_map<std::string, std::string> &GetJittedExprs() {
218 static std::unordered_map<std::string, std::string> jittedExpressions;
219 return jittedExpressions;
220}
221
222std::string BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes,
223 bool isSingleColumn = false, const std::string &varyColType = "")
224{
225 assert(vars.size() == varTypes.size());
226
227 TPRegexp re(R"(\breturn\b)");
228 const bool hasReturnStmt = re.MatchB(expr);
229
230 static const std::vector<std::string> fundamentalTypes = {
231 "int",
232 "signed",
233 "signed int",
234 "Int_t",
235 "unsigned",
236 "unsigned int",
237 "UInt_t",
238 "double",
239 "Double_t",
240 "float",
241 "Float_t",
242 "char",
243 "Char_t",
244 "unsigned char",
245 "UChar_t",
246 "bool",
247 "Bool_t",
248 "short",
249 "short int",
250 "Short_t",
251 "long",
252 "long int",
253 "long long int",
254 "Long64_t",
255 "unsigned long",
256 "unsigned long int",
257 "ULong64_t",
258 "std::size_t",
259 "size_t",
260 "Ssiz_t"
261 };
262
263 std::stringstream ss;
264 ss << "(";
265 for (auto i = 0u; i < vars.size(); ++i) {
266 std::string fullType;
267 const auto &type = varTypes[i];
268 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), type) != fundamentalTypes.end()) {
269 // pass it by const value to help detect common mistakes such as if(x = 3)
270 fullType = "const " + type + " ";
271 } else {
272 // We pass by reference to avoid expensive copies
273 // It can't be const reference in general, as users might want/need to call non-const methods on the values
274 fullType = type + "& ";
275 }
276 ss << fullType << vars[i] << ", ";
277 }
278 if (!vars.empty())
279 ss.seekp(-2, ss.cur);
280
281 // When building the function expression for a Vary call, we try to help the
282 // user by removing the need to explicitly write the vector return type.
283 // For now, Vary works by returning a (nested) RVec, depending on how many
284 // variables need to vary in lockstep.
285 auto finalizeExprForVary = [&]() {
286 std::string trailRetType{};
287 // Trim formatting characters at the extremes of the user expression
288 auto first_not_space = expr.find_first_not_of(" \n\t");
289 auto last_not_space = expr.find_last_not_of(" \n\t");
290 if (first_not_space != std::string::npos && last_not_space != std::string::npos && expr[first_not_space] == '{' &&
291 expr[last_not_space] == '}') {
292 // User expression is of type '{...}', a potential constructor for an
293 // RVec. At the same time, they have not decided the RVec return type
294 // Add trailing return type for the convenience of the user
295 // The innermost value type is by default the type of the first given column
296 trailRetType = " -> ";
297 if (isSingleColumn)
298 trailRetType += "ROOT::RVec<" + varyColType + ">";
299 else
300 trailRetType += "ROOT::RVec<ROOT::RVec<" + varyColType + ">>";
301 trailRetType += ' ';
302 }
303 std::string trailRetToken{trailRetType.empty() ? ") {" : ')' + trailRetType + '{'};
304 if (!hasReturnStmt)
305 trailRetToken += " return ";
306 return trailRetToken;
307 };
308
309 if (!varyColType.empty())
310 ss << finalizeExprForVary();
311 else
312 ss << (hasReturnStmt ? ") {" : ") { return ");
313
314 // Must inject \n to avoid cases where the user puts a comment after the expression
315 ss << expr << "\n;}\n";
316
317 return ss.str();
318}
319
320/// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function.
321/// If the function is already in GetJittedExprs, return the name for the function that has already been jitted.
322std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes,
323 bool isSingleColumn = false, const std::string &varyColType = "")
324{
326
327 const auto funcCode = BuildFunctionString(expr, vars, varTypes, isSingleColumn, varyColType);
328 auto &exprMap = GetJittedExprs();
329 const auto exprIt = exprMap.find(funcCode);
330 if (exprIt != exprMap.end()) {
331 // expression already there
332 const auto funcName = exprIt->second;
333 return funcName;
334 }
335
336 // new expression
337 const auto funcBaseName = "func" + std::to_string(exprMap.size());
338 const auto funcFullName = "R_rdf::" + funcBaseName;
339
340 const auto toDeclare = "namespace R_rdf {\nauto " + funcBaseName + funcCode + "\nusing " + funcBaseName +
341 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
342 ")>::ret_type;\n}";
344
345 // InterpreterDeclare could throw. If it doesn't, mark the function as already jitted
346 exprMap.insert({funcCode, funcFullName});
347
348 return funcFullName;
349}
350
351/// Each jitted function comes with a func_ret_t type alias for its return type.
352/// Resolve that alias and return the true type as string.
353std::string RetTypeOfFunc(const std::string &funcName)
354{
355 const auto dt = gROOT->GetType((funcName + "_ret_t").c_str());
356 R__ASSERT(dt != nullptr);
357 const auto type = dt->GetFullTypeName();
358 return type;
359}
360
361[[noreturn]] void
362ThrowJitBuildActionHelperTypeError(const std::string &actionTypeNameBase, const std::type_info &helperArgType)
363{
364 int err = 0;
365 const char *cname = TClassEdit::DemangleTypeIdName(helperArgType, err);
366 std::string actionHelperTypeName = cname;
367 delete[] cname;
368 if (err != 0)
369 actionHelperTypeName = helperArgType.name();
370
371 std::string exceptionText =
372 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + "\" action using helper type \"" +
373 actionHelperTypeName +
374 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
375 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
376 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
377 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
378 "interpreter, e.g. via gInterpreter->Declare.";
379
380 throw std::runtime_error(exceptionText);
381}
382
383} // anonymous namespace
384
385namespace ROOT {
386namespace Internal {
387namespace RDF {
388
389/// Take a list of column names, return that list with entries starting by '#' filtered out.
390/// The function throws when filtering out a column this way.
391ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
392{
393 ColumnNames_t columnListWithoutSizeColumns;
394 ColumnNames_t filteredColumns;
395 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
396 [&](const std::string &name) {
397 if (name[0] == '#') {
398 filteredColumns.emplace_back(name);
399 return false;
400 } else {
401 return true;
402 }
403 });
404
405 if (!filteredColumns.empty()) {
406 std::string msg = "Column name(s) {";
407 for (auto &c : filteredColumns)
408 msg += c + ", ";
409 msg[msg.size() - 2] = '}';
410 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
411 throw std::runtime_error(msg);
412 }
413
414 return columnListWithoutSizeColumns;
415}
416
417void CheckValidCppVarName(std::string_view var, const std::string &where)
418{
419 bool isValid = true;
420
421 if (var.empty())
422 isValid = false;
423 const char firstChar = var[0];
424
425 // first character must be either a letter or an underscore
426 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
427 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
428 if (!isValidFirstChar)
429 isValid = false;
430
431 // all characters must be either a letter, an underscore or a number
432 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
433 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
434 for (const char c : var)
435 if (!isValidTok(c))
436 isValid = false;
437
438 if (!isValid) {
439 const auto objName = where == "Define" ? "column" : "variation";
440 const auto error = "RDataFrame::" + where + ": cannot define " + objName + " \"" + std::string(var) +
441 "\". Not a valid C++ variable name.";
442 throw std::runtime_error(error);
443 }
444}
445
446std::string DemangleTypeIdName(const std::type_info &typeInfo)
447{
448 int dummy(0);
449 char *tn = TClassEdit::DemangleTypeIdName(typeInfo, dummy);
450 std::string tname(tn);
451 free(tn);
452 return tname;
453}
454
456ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
457{
458 const auto theRegexSize = columnNameRegexp.size();
459 std::string theRegex(columnNameRegexp);
460
461 const auto isEmptyRegex = 0 == theRegexSize;
462 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
463 if (theRegexSize > 0 && theRegex[0] != '^')
464 theRegex = "^" + theRegex;
465 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
466 theRegex = theRegex + "$";
467
468 ColumnNames_t selectedColumns;
469
470 // Since we support gcc48 and it does not provide in its stl std::regex,
471 // we need to use TPRegexp
472 TPRegexp regexp(theRegex);
473 for (auto &&colName : colNames) {
474 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
475 selectedColumns.emplace_back(colName);
476 }
477 }
478
479 if (selectedColumns.empty()) {
480 std::string text(callerName);
481 if (columnNameRegexp.empty()) {
482 text = ": there is no column available to match.";
483 } else {
484 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
485 }
486 throw std::runtime_error(text);
487 }
488 return selectedColumns;
489}
490
491/// Throw if column `definedColView` is already there.
492void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
493 const ColumnNames_t &dataSourceColumns)
494{
495
496 std::string error{};
497 if (colRegister.IsAlias(definedColView))
498 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
499 "\", already exists in this branch of the computation graph.";
500 else if (colRegister.IsDefineOrAlias(definedColView))
501 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
502 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end())
503 error =
504 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
505
506 if (!error.empty()) {
507 error = "RDataFrame::" + where + ": cannot define column \"" + std::string(definedColView) + "\". " + error;
508 throw std::runtime_error(error);
509 }
510}
511
512/// Throw if column `definedColView` is _not_ already there.
513void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
514 const ColumnNames_t &dataSourceColumns)
515{
516 std::string error{};
517
518 if (colRegister.IsAlias(definedColView)) {
519 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
520 "\", already exists. Aliases cannot be Redefined or Varied.";
521 }
522
523 if (error.empty()) {
524 const bool isAlreadyDefined = colRegister.IsDefineOrAlias(definedColView);
525 const bool isADSColumn =
526 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end();
527
528 if (!isAlreadyDefined && !isADSColumn)
529 error = "No column with that name was found in the dataset. Use Define to create a new column.";
530 }
531
532 if (!error.empty()) {
533 if (where == "DefaultValueFor")
534 error = "RDataFrame::" + where + ": cannot provide default values for column \"" +
535 std::string(definedColView) + "\". " + error;
536 else
537 error = "RDataFrame::" + where + ": cannot redefine or vary column \"" + std::string(definedColView) + "\". " +
538 error;
539 throw std::runtime_error(error);
540 }
541}
542
543/// Throw if the column has systematic variations attached.
544void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
545{
546 const std::string definedCol(definedColView);
547 const auto &variationDeps = colRegister.GetVariationDeps(definedCol);
548 if (!variationDeps.empty()) {
549 if (where == "Redefine") {
550 const std::string error = "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
551 "\". The column depends on one or more systematic variations and re-defining varied "
552 "columns is not supported.";
553 throw std::runtime_error(error);
554 } else if (where == "DefaultValueFor") {
555 const std::string error = "RDataFrame::" + where + ": cannot provide a default value for column \"" +
556 definedCol +
557 "\". The column depends on one or more systematic variations and it should not be "
558 "possible to have missing values in varied columns.";
559 throw std::runtime_error(error);
560 } else {
561 const std::string error =
562 "RDataFrame::" + where + ": this operation cannot work with columns that depend on systematic variations.";
563 throw std::runtime_error(error);
564 }
565 }
566}
567
568void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
569{
570 if (nTemplateParams != nColumnNames) {
571 std::string err_msg = "The number of template parameters specified is ";
572 err_msg += std::to_string(nTemplateParams);
573 err_msg += " while ";
574 err_msg += std::to_string(nColumnNames);
575 err_msg += " columns have been specified.";
576 throw std::runtime_error(err_msg);
577 }
578}
579
580/// Choose between local column names or default column names, throw in case of errors.
581const ColumnNames_t
582SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
583{
584 if (names.empty()) {
585 // use default column names
586 if (defaultNames.size() < nRequiredNames)
587 throw std::runtime_error(
588 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
589 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
590 // return first nRequiredNames default column names
591 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
592 } else {
593 // use column names provided by the user to this particular transformation/action
594 if (names.size() != nRequiredNames) {
595 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
596 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
597 " provided:";
598 for (const auto &name : names)
599 msg += " \"" + name + "\",";
600 msg.back() = '.';
601 throw std::runtime_error(msg);
602 }
603 return names;
604 }
605}
606
607ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols,
608 const ColumnNames_t &dataSourceColumns)
609{
610 ColumnNames_t unknownColumns;
611 for (auto &column : requiredCols) {
612 if (definedCols.IsDefineOrAlias(column))
613 continue;
614 const auto isDataSourceColumn =
615 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
616 if (isDataSourceColumn)
617 continue;
618 unknownColumns.emplace_back(column);
619 }
620 return unknownColumns;
621}
622
623std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
624{
625 return loopManager->GetFiltersNames();
626}
627
628ParsedTreePath ParseTreePath(std::string_view fullTreeName)
629{
630 // split name into directory and treename if needed
631 std::string_view dirName = "";
632 std::string_view treeName = fullTreeName;
633 const auto lastSlash = fullTreeName.rfind('/');
634 if (std::string_view::npos != lastSlash) {
635 dirName = treeName.substr(0, lastSlash);
636 treeName = treeName.substr(lastSlash + 1, treeName.size());
637 }
638 return {std::string(treeName), std::string(dirName)};
639}
640
641std::string PrettyPrintAddr(const void *const addr)
642{
643 std::stringstream s;
644 // Windows-friendly
645 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
646 return s.str();
647}
648
649/// Book the jitting of a Filter call
650std::shared_ptr<RDFDetail::RJittedFilter>
651BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> prevNode, std::string_view name, std::string_view expression,
652 const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
653{
654 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
655
656 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
657 const auto exprVarTypes =
658 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Filter", /*vector2RVec=*/true);
659 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
660 const auto type = RetTypeOfFunc(funcName);
661 if (type != "bool")
662 throw std::runtime_error("Filter: the following expression does not evaluate to bool:\n" +
663 std::string(expression));
664
665 auto *lm = prevNode->GetLoopManagerUnchecked();
666 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
667 lm, name, Union(colRegister.GetVariationDeps(parsedExpr.fUsedCols), prevNode->GetVariations()), prevNode);
668
669 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
670 std::stringstream filterInvocation;
671 filterInvocation << "(const std::vector<std::string> &colNames, "
672 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
673 << "ROOT::Detail::RDF::RLoopManager &lm, "
674 << "void *jittedFilter, "
675 << "std::shared_ptr<void> *) {\n";
676 filterInvocation << " ROOT::Internal::RDF::JitFilterHelper(" << funcName << ", "
677 << "colNames, "
678 << "colRegister, "
679 << "lm, "
680 << "reinterpret_cast<ROOT::Detail::RDF::RJittedFilter*>(jittedFilter)"
681 << ");\n}\n";
682 lm->RegisterJitHelperCall(filterInvocation.str(),
683 std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
684 jittedFilter);
685
686 return jittedFilter;
687}
688
689/// Book the jitting of a Define call
690std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
691 RDataSource *ds, const RColumnRegister &colRegister)
692{
693 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
694
695 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
696 const auto exprVarTypes =
697 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Define", /*vector2RVec=*/true);
698 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
699 const auto type = RetTypeOfFunc(funcName);
700
701 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, colRegister, parsedExpr.fUsedCols);
702
703 // lifetime of pointees:
704 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
705 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
706 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
707 std::stringstream defineInvocation;
708 defineInvocation << "(const std::vector<std::string> &colNames, "
709 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
710 << "ROOT::Detail::RDF::RLoopManager &lm, "
711 << "void *jittedDefine, "
712 << "std::shared_ptr<void> *) {\n";
713 defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
714 << funcName << ", "
715 << "colNames, "
716 << "colRegister, "
717 << "lm, "
718 << "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
719 << ");\n}\n";
720 lm.RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
721 parsedExpr.fUsedCols, jittedDefine);
722
723 return jittedDefine;
724}
725
726/// Book the jitting of a DefinePerSample call
727std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
728 RLoopManager &lm, const RColumnRegister &colRegister)
729{
730 const auto funcName = DeclareFunction(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
731 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
732 const auto retType = RetTypeOfFunc(funcName);
733
734 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, colRegister, ColumnNames_t{});
735
736 // lifetime of pointees:
737 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
738 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
739 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
740 std::stringstream defineInvocation;
741 defineInvocation << "(const std::vector<std::string> &colNames, "
742 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
743 << "ROOT::Detail::RDF::RLoopManager &lm, "
744 << "void *jittedDefine, "
745 << "std::shared_ptr<void> *) {\n";
746 defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
747 << funcName << ", "
748 << "colNames, "
749 << "colRegister, "
750 << "lm, "
751 << "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
752 << ");\n}\n";
753 lm.RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
754 {}, jittedDefine);
755 return jittedDefine;
756}
757
758/// Book the jitting of a Vary call
759std::shared_ptr<RJittedVariation>
760BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
761 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
762 RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn,
763 const std::string &varyColType)
764{
765 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
766
767 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
768 const auto exprVarTypes =
769 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Vary", /*vector2RVec=*/true);
770 const auto funcName =
771 DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes, isSingleColumn, varyColType);
772 const auto type = RetTypeOfFunc(funcName);
773
774 if (type.rfind("ROOT::VecOps::RVec", 0) != 0) {
775 throw std::runtime_error(
776 "Jitted Vary expressions must return an RVec object. The following expression return type is '" + type +
777 "' instead:\n" + parsedExpr.fExpr);
778 }
779
780 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
781 lm, parsedExpr.fUsedCols);
782
783 // build invocation to JitVariationHelper
784 // variation tag (array of strings) passed as const char** plus size.
785 // lifetime of pointees:
786 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
787 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
788 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
789 // - variedColsOnHeap: deleted by registration function
790 std::stringstream varyInvocation;
791 varyInvocation << "(const std::vector<std::string> &inputColNames, "
792 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
793 << "ROOT::Detail::RDF::RLoopManager &lm, "
794 << "void *jittedVariation, "
795 << "std::shared_ptr<void> *helperArg) {\n";
796 varyInvocation
797 << " auto *variedColNamesAndTags = reinterpret_cast<std::shared_ptr<std::pair<std::vector<std::string>, "
798 "std::vector<std::string>>> *>(helperArg);"
799 << " ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? "true" : "false") << ">(" << funcName
800 << ", "
801 << "inputColNames, "
802 << "colRegister, "
803 << "lm, "
804 << "reinterpret_cast<ROOT::Internal::RDF::RJittedVariation *>(jittedVariation), "
805 << "(*variedColNamesAndTags)->first, "
806 << "(*variedColNamesAndTags)->second"
807 << ");\n}\n";
809 varyInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
810 jittedVariation,
811 std::make_shared<std::pair<std::vector<std::string>, std::vector<std::string>>>(colNames, variationTags));
812 return jittedVariation;
813}
814
815// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
816// (see comments in the body for actual jitted code)
817std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at,
818 TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
819 const bool vector2RVec)
820{
821 // retrieve type of action as a string
822 auto actionTypeClass = TClass::GetClass(at);
823 if (!actionTypeClass) {
824 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
825 throw std::runtime_error(exceptionText);
826 }
827 const std::string actionTypeName = actionTypeClass->GetName();
828 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
829
830 // retrieve type of result of the action as a string
831 const auto helperArgTypeName = TypeID2TypeName(helperArgType);
832 if (helperArgTypeName.empty()) {
833 ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
834 }
835
836 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
837 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
838 std::stringstream createAction_str;
839 createAction_str << "(const std::vector<std::string> &colNames, "
840 << "ROOT::Internal::RDF::RColumnRegister &colRegister, "
841 << "ROOT::Detail::RDF::RLoopManager &lm, "
842 << "void *jittedAction, "
843 << "std::shared_ptr<void> *helperArg) {\n";
844 createAction_str << " ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
845 const auto columnTypeNames = GetValidatedArgTypes(cols, colRegister, tree, ds, actionTypeNameBase, vector2RVec);
846 for (auto &colType : columnTypeNames)
847 createAction_str << ", " << colType;
848 createAction_str << ">("
849 << "colNames, "
850 << "colRegister, "
851 << "lm, "
852 << "reinterpret_cast<ROOT::Internal::RDF::RJittedAction *>(jittedAction), " << nSlots << ", "
853 << "reinterpret_cast<std::shared_ptr<" << helperArgTypeName << "> *>(helperArg)"
854 << ");\n}\n";
855 return createAction_str.str();
856}
857
858bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
859{
860 for (const auto &s : strings) {
861 if (s.empty())
862 return true;
863 }
864 return false;
865}
866
867std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
868{
869 return ptr;
870}
871
872/// Given the desired number of columns and the user-provided list of columns:
873/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
874/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
875/// * replace column names from aliases by the actual column name
876/// Return the list of selected column names.
877ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
878 const RColumnRegister &colRegister, RDataSource *ds)
879{
880 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
881
882 for (auto &col : selectedColumns) {
883 col = colRegister.ResolveAlias(col);
884 }
885
886 // Complain if there are still unknown columns at this point
887 auto unknownColumns = FindUnknownColumns(selectedColumns, colRegister, ds ? ds->GetColumnNames() : ColumnNames_t{});
888
889 if (!unknownColumns.empty()) {
890 // Some columns are still unknown, we need to understand if the error
891 // should be printed or if the user requested to explicitly disable it.
892 // Look for a possible overlap between the unknown columns and the
893 // columns we should ignore for the purpose of the following exception
894 std::set<std::string> intersection;
895 const auto &colsToIgnore = lm.GetSuppressErrorsForMissingBranches();
896 std::sort(unknownColumns.begin(), unknownColumns.end());
897 std::set_intersection(unknownColumns.cbegin(), unknownColumns.cend(), colsToIgnore.cbegin(), colsToIgnore.cend(),
898 std::inserter(intersection, intersection.begin()));
899 if (intersection.empty()) {
900 std::string errMsg = std::string("Unknown column") + (unknownColumns.size() > 1 ? "s: " : ": ");
901 for (auto &unknownColumn : unknownColumns)
902 errMsg += '"' + unknownColumn + "\", ";
903 errMsg.resize(errMsg.size() - 2); // remove last ", "
904 throw std::runtime_error(errMsg);
905 }
906 }
907
908 return selectedColumns;
909}
910
911std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
912 TTree *tree, RDataSource *ds, const std::string &context,
913 bool vector2RVec)
914{
915 auto toCheckedArgType = [&](const std::string &c) {
916 RDFDetail::RDefineBase *define = colRegister.GetDefine(c);
917 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2RVec);
918 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
919 const auto msg =
920 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
921 ") is not known to the interpreter, but a just-in-time-compiled " + context +
922 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
923 throw std::runtime_error(msg);
924 }
925 return colType;
926 };
927 std::vector<std::string> colTypes;
928 colTypes.reserve(colNames.size());
929 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
930 return colTypes;
931}
932
934{
935 std::unordered_set<std::string> uniqueCols;
936 for (auto &col : cols) {
937 if (!uniqueCols.insert(col).second) {
938 const auto msg = "Error: column \"" + col +
939 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
940 "readable with RDataFrame.";
941 throw std::logic_error(msg);
942 }
943 }
944}
945
947{
948 const ROOT::RDF::RSnapshotOptions defaultSnapshotOpts;
949 std::string optionName;
950
953 if (opts.fApproxZippedClusterSize != defaultSnapshotOpts.fApproxZippedClusterSize) {
954 optionName = "fApproxZippedClusterSize";
955 } else if (opts.fMaxUnzippedClusterSize != defaultSnapshotOpts.fMaxUnzippedClusterSize) {
956 optionName = "fMaxUnzippedClusterSize";
957 } else if (opts.fInitialUnzippedPageSize != defaultSnapshotOpts.fInitialUnzippedPageSize) {
958 optionName = "fInitialUnzippedPageSize";
959 } else if (opts.fMaxUnzippedPageSize != defaultSnapshotOpts.fMaxUnzippedPageSize) {
960 optionName = "fMaxUnzippedPageSize";
961 } else if (opts.fEnablePageChecksums != defaultSnapshotOpts.fEnablePageChecksums) {
962 optionName = "fEnablePageChecksums";
963 } else if (opts.fEnableSamePageMerging != defaultSnapshotOpts.fEnableSamePageMerging) {
964 optionName = "fEnableSamePageMerging";
965 }
966
967 if (!optionName.empty()) {
968 Warning("Snapshot",
969 "The RNTuple-specific %s option in RSnapshotOptions has been set, but the output format is "
970 "set to TTree, so this option won't have any effect. Use the other options available in "
971 "RSnapshotOptions to "
972 "configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.",
973 optionName.c_str());
974 }
976 if (opts.fAutoFlush != defaultSnapshotOpts.fAutoFlush) {
977 optionName = "fAutoFlush";
978 } else if (opts.fSplitLevel != defaultSnapshotOpts.fSplitLevel) {
979 optionName = "fSplitLevel";
980 } else if (opts.fBasketSize != defaultSnapshotOpts.fBasketSize) {
981 optionName = "fBasketSize";
982 }
983
984 if (!optionName.empty()) {
985 Warning(
986 "Snapshot",
987 "The TTree-specific %s option in RSnapshotOptions has been set, but the output format is set to RNTuple, "
988 "so this option won't have any effect. Use the fNTupleWriteOptions option available in RSnapshotOptions to "
989 "configure the output RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.",
990 optionName.c_str());
991 }
992 }
993}
994
995/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
996/// in the right positions (i.e. before the array branches that need them).
997std::pair<std::vector<std::string>, std::vector<std::string>>
998AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector<std::string> &&colsWithoutAliases,
999 std::vector<std::string> &&colsWithAliases)
1000{
1001 TTree *tree{};
1002 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(ds))
1003 tree = treeDS->GetTree();
1004 if (!tree) // nothing to do
1005 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1006
1007 assert(colsWithoutAliases.size() == colsWithAliases.size());
1008
1009 auto nCols = colsWithoutAliases.size();
1010 // Use index-iteration as we modify the vector during the iteration.
1011 for (std::size_t i = 0u; i < nCols; ++i) {
1012 const auto &colName = colsWithoutAliases[i];
1013
1014 auto *b = tree->GetBranch(colName.c_str());
1015 if (!b) // try harder
1016 b = tree->FindBranch(colName.c_str());
1017
1018 if (!b)
1019 continue;
1020
1021 auto *leaves = b->GetListOfLeaves();
1022 if (b->IsA() != TBranch::Class() || leaves->GetEntries() != 1)
1023 continue; // this branch is not a variable-sized array, nothing to do
1024
1025 TLeaf *countLeaf = static_cast<TLeaf *>(leaves->At(0))->GetLeafCount();
1026 if (!countLeaf || IsStrInVec(countLeaf->GetName(), colsWithoutAliases))
1027 continue; // not a variable-sized array or the size branch is already there, nothing to do
1028
1029 // otherwise we must insert the size in colsWithoutAliases _and_ colsWithAliases
1030 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->GetName());
1031 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->GetName());
1032 ++nCols;
1033 ++i; // as we inserted an element in the vector we iterate over, we need to move the index forward one extra time
1034 }
1035
1036 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1037}
1038
1040{
1041 std::set<std::string> uniqueCols;
1042 columnNames.erase(
1043 std::remove_if(columnNames.begin(), columnNames.end(),
1044 [&uniqueCols](const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1045 columnNames.end());
1046}
1047
1049{
1050 ColumnNames_t parentFields;
1051
1052 std::copy_if(columnNames.cbegin(), columnNames.cend(), std::back_inserter(parentFields),
1053 [](const std::string &colName) { return colName.find('.') == std::string::npos; });
1054
1055 columnNames.erase(std::remove_if(columnNames.begin(), columnNames.end(),
1056 [&parentFields](const std::string &colName) {
1057 if (colName.find('.') == std::string::npos)
1058 return false;
1059 const auto parentFieldName = colName.substr(0, colName.find_first_of('.'));
1060 return std::find(parentFields.cbegin(), parentFields.cend(), parentFieldName) !=
1061 parentFields.end();
1062 }),
1063 columnNames.end());
1064}
1065} // namespace RDF
1066} // namespace Internal
1067} // namespace ROOT
1068
1069namespace {
1070void AddDataSourceColumn(const std::string &colName, const std::type_info &typeID, ROOT::Detail::RDF::RLoopManager &lm,
1072{
1073
1074 if (colRegister.IsDefineOrAlias(colName))
1075 return;
1076
1077 if (lm.HasDataSourceColumnReaders(colName, typeID))
1078 return;
1079
1080 if (!ds.HasColumn(colName) &&
1082 return;
1083
1084 const auto nSlots = lm.GetNSlots();
1085 std::vector<std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>> colReaders;
1086 colReaders.reserve(nSlots);
1087 // TODO consider changing the interface so we return all of these for all slots in one go
1088 for (auto slot = 0u; slot < nSlots; ++slot)
1089 colReaders.emplace_back(
1090 ROOT::Internal::RDF::CreateColumnReader(ds, slot, colName, typeID, /*treeReader*/ nullptr));
1091
1092 lm.AddDataSourceColumnReaders(colName, std::move(colReaders), typeID);
1093}
1094} // namespace
1095
1096void ROOT::Internal::RDF::AddDSColumns(const std::vector<std::string> &colNames, ROOT::Detail::RDF::RLoopManager &lm,
1098 const std::vector<const std::type_info *> &colTypeIDs,
1100{
1101 auto nCols = colNames.size();
1102 assert(nCols == colTypeIDs.size() && "Must provide exactly one column type for each column to create");
1103 for (decltype(nCols) i{}; i < nCols; i++) {
1104 AddDataSourceColumn(colNames[i], *colTypeIDs[i], lm, ds, colRegister);
1105 }
1106}
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
char name[80]
Definition TGX11.cxx:148
Double_t err
#define gROOT
Definition TROOT.h:417
externTVirtualMutex * gROOTMutex
Definition TROOT.h:63
#define R__LOCKGUARD(mutex)
#define free
Definition civetweb.c:1578
The head node of a RDF computation graph.
void RegisterJitHelperCall(const std::string &funcBody, std::unique_ptr< ROOT::Internal::RDF::RColumnRegister > colRegister, const std::vector< std::string > &colnames, std::shared_ptr< void > jittedNode, std::shared_ptr< void > argument=nullptr)
const std::set< std::string > & GetSuppressErrorsForMissingBranches() const
void AddDataSourceColumnReaders(std::string_view col, std::vector< std::unique_ptr< RColumnReaderBase > > &&readers, const std::type_info &ti)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
bool HasDataSourceColumnReaders(std::string_view col, const std::type_info &ti) const
Return true if AddDataSourceColumnReaders was called for column name col.
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
std::string_view ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
static TClass * Class()
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2994
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
Definition TLeaf.h:124
const char * GetName() const override
Returns name of object.
Definition TNamed.h:49
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition TPRegexp.h:78
A TTree represents a columnar dataset.
Definition TTree.h:89
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition TTree.cxx:4890
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition TTree.cxx:5430
virtual TTree * GetTree() const
Definition TTree.h:604
TText * text
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister)
Book the jitting of a DefinePerSample call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:191
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:541
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > prevNode, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, const bool vector2RVec)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
Definition RDFUtils.cxx:689
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:330
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:272
void RemoveRNTupleSubfields(ColumnNames_t &columnNames)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:483
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:434
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister)
Book the jitting of a Define call.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn, const std::string &varyColType)
Book the jitting of a Vary call.
std::vector< std::string > ColumnNames_t
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)
Definition vec.h:98
A collection of options to steer the creation of the dataset on disk through Snapshot().
std::size_t fMaxUnzippedPageSize
(RNTuple only) Maximum allowed page size before compression
int fAutoFlush
(TTree only) AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
bool fEnableSamePageMerging
(RNTuple only) Enable identical-page deduplication. Requires page checksumming
std::size_t fInitialUnzippedPageSize
(RNTuple only) Initial page size before compression
bool fEnablePageChecksums
(RNTuple only) Enable checksumming for pages
std::size_t fApproxZippedClusterSize
(RNTuple only) Approximate target compressed cluster size
int fSplitLevel
(TTree only) Split level of output tree
std::size_t fMaxUnzippedClusterSize
(RNTuple only) Maximum uncompressed cluster size
int fBasketSize
(TTree only) Set a custom basket size option.