40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Woverloaded-virtual"
42#pragma GCC diagnostic ignored "-Wshadow"
46#pragma GCC diagnostic pop
60#include <unordered_map>
61#include <unordered_set>
74struct ParsedExpression {
85std::pair<ColumnNames_t, ColumnNames_t> FindUsedColsAndAliases(
const std::string &expr,
86 const ROOT::Internal::RDF::RColumnRegister &colRegister,
87 const ColumnNames_t &dataSourceColNames)
89 lexertk::generator tokens;
90 const auto tokensOk = tokens.process(expr);
92 const auto msg =
"Failed to tokenize expression:\n" + expr +
"\n\nMake sure it is valid C++.";
93 throw std::runtime_error(msg);
96 std::unordered_set<std::string> usedCols;
97 std::unordered_set<std::string> usedAliases;
100 const auto nTokens = tokens.size();
101 const auto kSymbol = lexertk::token::e_symbol;
102 for (
auto i = 0u; i < nTokens; ++i) {
103 const auto &tok = tokens[i];
105 if (tok.type != kSymbol || tok.value ==
"&" || tok.value ==
"|") {
113 auto dotChainKeepsGoing = [&](
unsigned int _i) {
114 return _i + 2 <= nTokens && tokens[_i + 1].value ==
"." && tokens[_i + 2].type == kSymbol;
116 while (dotChainKeepsGoing(i)) {
117 potentialColNames.emplace_back(potentialColNames.back() +
"." + tokens[i + 2].value);
123 const auto maybeAnAlias = potentialColNames[0];
124 const auto &resolvedAlias = colRegister.
ResolveAlias(maybeAnAlias);
125 if (resolvedAlias != maybeAnAlias) {
126 usedAliases.insert(maybeAnAlias);
127 for (
auto &s : potentialColNames)
128 s.replace(0, maybeAnAlias.size(), resolvedAlias);
133 auto isRDFColumn = [&](
const std::string &col) {
138 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
139 if (longestRDFColMatch != potentialColNames.crend())
140 usedCols.insert(*longestRDFColMatch);
143 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
147std::string EscapeDots(
const std::string &s)
151 dot.Substitute(out,
"\\.",
"g");
152 return std::string(std::move(out));
155TString ResolveAliases(
const TString &expr,
const ColumnNames_t &usedAliases,
156 const ROOT::Internal::RDF::RColumnRegister &colRegister)
160 for (
const auto &alias : usedAliases) {
162 TPRegexp replacer(
"\\b" + EscapeDots(alias) +
"\\b");
163 replacer.Substitute(out, col.data(),
"g");
169ParsedExpression ParseRDFExpression(std::string_view expr,
const ROOT::Internal::RDF::RColumnRegister &colRegister,
170 const ColumnNames_t &dataSourceColNames)
173 TString preProcessedExpr(expr);
175 TPRegexp colSizeReplacer(
176 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
177 colSizeReplacer.Substitute(preProcessedExpr,
"$1R_rdf_sizeof_$3",
"g");
181 std::tie(usedCols, usedAliases) =
182 FindUsedColsAndAliases(std::string(preProcessedExpr), colRegister, dataSourceColNames);
184 const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
188 TString exprWithVars(exprNoAliases);
191 for (
auto i = 0u; i < varNames.size(); ++i)
192 varNames[i] =
"var" + std::to_string(i);
199 std::sort(usedCols.begin(), usedCols.end(),
200 [](
const std::string &
a,
const std::string &
b) { return a.size() > b.size(); });
201 for (
const auto &col : usedCols) {
202 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
203 TPRegexp replacer(
"\\b" + EscapeDots(col) +
"\\b");
204 replacer.Substitute(exprWithVars, varNames[varIdx],
"g");
207 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
217std::unordered_map<std::string, std::string> &GetJittedExprs() {
218 static std::unordered_map<std::string, std::string> jittedExpressions;
219 return jittedExpressions;
222std::string BuildFunctionString(
const std::string &expr,
const ColumnNames_t &vars,
const ColumnNames_t &varTypes,
223 bool isSingleColumn =
false,
const std::string &varyColType =
"")
225 assert(vars.size() == varTypes.size());
227 TPRegexp re(R
"(\breturn\b)");
228 const bool hasReturnStmt = re.MatchB(expr);
230 static const std::vector<std::string> fundamentalTypes = {
263 std::stringstream ss;
265 for (
auto i = 0u; i < vars.size(); ++i) {
266 std::string fullType;
267 const auto &
type = varTypes[i];
268 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), type) != fundamentalTypes.end()) {
270 fullType =
"const " +
type +
" ";
274 fullType =
type +
"& ";
276 ss << fullType << vars[i] <<
", ";
279 ss.seekp(-2, ss.cur);
285 auto finalizeExprForVary = [&]() {
286 std::string trailRetType{};
288 auto first_not_space = expr.find_first_not_of(
" \n\t");
289 auto last_not_space = expr.find_last_not_of(
" \n\t");
290 if (first_not_space != std::string::npos && last_not_space != std::string::npos && expr[first_not_space] ==
'{' &&
291 expr[last_not_space] ==
'}') {
296 trailRetType =
" -> ";
298 trailRetType +=
"ROOT::RVec<" + varyColType +
">";
300 trailRetType +=
"ROOT::RVec<ROOT::RVec<" + varyColType +
">>";
303 std::string trailRetToken{trailRetType.empty() ?
") {" :
')' + trailRetType +
'{'};
305 trailRetToken +=
" return ";
306 return trailRetToken;
309 if (!varyColType.empty())
310 ss << finalizeExprForVary();
312 ss << (hasReturnStmt ?
") {" :
") { return ");
315 ss << expr <<
"\n;}\n";
322std::string DeclareFunction(
const std::string &expr,
const ColumnNames_t &vars,
const ColumnNames_t &varTypes,
323 bool isSingleColumn =
false,
const std::string &varyColType =
"")
327 const auto funcCode = BuildFunctionString(expr, vars, varTypes, isSingleColumn, varyColType);
328 auto &exprMap = GetJittedExprs();
329 const auto exprIt = exprMap.find(funcCode);
330 if (exprIt != exprMap.end()) {
332 const auto funcName = exprIt->second;
337 const auto funcBaseName =
"func" + std::to_string(exprMap.size());
338 const auto funcFullName =
"R_rdf::" + funcBaseName;
340 const auto toDeclare =
"namespace R_rdf {\nauto " + funcBaseName + funcCode +
"\nusing " + funcBaseName +
341 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
346 exprMap.insert({funcCode, funcFullName});
353std::string RetTypeOfFunc(
const std::string &funcName)
355 const auto dt =
gROOT->GetType((funcName +
"_ret_t").c_str());
357 const auto type = dt->GetFullTypeName();
362ThrowJitBuildActionHelperTypeError(
const std::string &actionTypeNameBase,
const std::type_info &helperArgType)
366 std::string actionHelperTypeName = cname;
369 actionHelperTypeName = helperArgType.name();
371 std::string exceptionText =
372 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase +
"\" action using helper type \"" +
373 actionHelperTypeName +
374 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
375 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
376 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
377 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
378 "interpreter, e.g. via gInterpreter->Declare.";
380 throw std::runtime_error(exceptionText);
395 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
396 [&](
const std::string &
name) {
397 if (name[0] ==
'#') {
398 filteredColumns.emplace_back(name);
405 if (!filteredColumns.empty()) {
406 std::string msg =
"Column name(s) {";
407 for (
auto &
c : filteredColumns)
409 msg[msg.size() - 2] =
'}';
410 msg +=
"will be ignored. Please go through a valid Alias to " + action +
" an array size column";
411 throw std::runtime_error(msg);
414 return columnListWithoutSizeColumns;
423 const char firstChar = var[0];
426 auto isALetter = [](
char c) {
return (
c >=
'A' &&
c <=
'Z') || (
c >=
'a' &&
c <=
'z'); };
427 const bool isValidFirstChar = firstChar ==
'_' || isALetter(firstChar);
428 if (!isValidFirstChar)
432 auto isANumber = [](
char c) {
return c >=
'0' &&
c <=
'9'; };
433 auto isValidTok = [&isALetter, &isANumber](
char c) {
return c ==
'_' || isALetter(
c) || isANumber(
c); };
434 for (
const char c : var)
439 const auto objName = where ==
"Define" ?
"column" :
"variation";
440 const auto error =
"RDataFrame::" + where +
": cannot define " + objName +
" \"" + std::string(var) +
441 "\". Not a valid C++ variable name.";
442 throw std::runtime_error(error);
450 std::string tname(tn);
458 const auto theRegexSize = columnNameRegexp.size();
459 std::string theRegex(columnNameRegexp);
461 const auto isEmptyRegex = 0 == theRegexSize;
463 if (theRegexSize > 0 && theRegex[0] !=
'^')
464 theRegex =
"^" + theRegex;
465 if (theRegexSize > 0 && theRegex[theRegexSize - 1] !=
'$')
466 theRegex = theRegex +
"$";
473 for (
auto &&colName : colNames) {
475 selectedColumns.emplace_back(colName);
479 if (selectedColumns.empty()) {
480 std::string
text(callerName);
481 if (columnNameRegexp.empty()) {
482 text =
": there is no column available to match.";
484 text =
": regex \"" + std::string(columnNameRegexp) +
"\" did not match any column.";
486 throw std::runtime_error(
text);
488 return selectedColumns;
497 if (colRegister.
IsAlias(definedColView))
498 error =
"An alias with that name, pointing to column \"" + std::string(colRegister.
ResolveAlias(definedColView)) +
499 "\", already exists in this branch of the computation graph.";
501 error =
"A column with that name has already been Define'd. Use Redefine to force redefinition.";
502 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end())
504 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
506 if (!error.empty()) {
507 error =
"RDataFrame::" + where +
": cannot define column \"" + std::string(definedColView) +
"\". " + error;
508 throw std::runtime_error(error);
518 if (colRegister.
IsAlias(definedColView)) {
519 error =
"An alias with that name, pointing to column \"" + std::string(colRegister.
ResolveAlias(definedColView)) +
520 "\", already exists. Aliases cannot be Redefined or Varied.";
524 const bool isAlreadyDefined = colRegister.
IsDefineOrAlias(definedColView);
525 const bool isADSColumn =
526 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end();
528 if (!isAlreadyDefined && !isADSColumn)
529 error =
"No column with that name was found in the dataset. Use Define to create a new column.";
532 if (!error.empty()) {
533 if (where ==
"DefaultValueFor")
534 error =
"RDataFrame::" + where +
": cannot provide default values for column \"" +
535 std::string(definedColView) +
"\". " + error;
537 error =
"RDataFrame::" + where +
": cannot redefine or vary column \"" + std::string(definedColView) +
"\". " +
539 throw std::runtime_error(error);
546 const std::string definedCol(definedColView);
548 if (!variationDeps.empty()) {
549 if (where ==
"Redefine") {
550 const std::string error =
"RDataFrame::" + where +
": cannot redefine column \"" + definedCol +
551 "\". The column depends on one or more systematic variations and re-defining varied "
552 "columns is not supported.";
553 throw std::runtime_error(error);
554 }
else if (where ==
"DefaultValueFor") {
555 const std::string error =
"RDataFrame::" + where +
": cannot provide a default value for column \"" +
557 "\". The column depends on one or more systematic variations and it should not be "
558 "possible to have missing values in varied columns.";
559 throw std::runtime_error(error);
561 const std::string error =
562 "RDataFrame::" + where +
": this operation cannot work with columns that depend on systematic variations.";
563 throw std::runtime_error(error);
570 if (nTemplateParams != nColumnNames) {
571 std::string err_msg =
"The number of template parameters specified is ";
572 err_msg += std::to_string(nTemplateParams);
573 err_msg +=
" while ";
574 err_msg += std::to_string(nColumnNames);
575 err_msg +=
" columns have been specified.";
576 throw std::runtime_error(err_msg);
586 if (defaultNames.size() < nRequiredNames)
587 throw std::runtime_error(
588 std::to_string(nRequiredNames) +
" column name" + (nRequiredNames == 1 ?
" is" :
"s are") +
589 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
591 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
594 if (names.size() != nRequiredNames) {
595 auto msg = std::to_string(nRequiredNames) +
" column name" + (nRequiredNames == 1 ?
" is" :
"s are") +
596 " required but " + std::to_string(names.size()) + (names.size() == 1 ?
" was" :
" were") +
598 for (
const auto &
name : names)
599 msg +=
" \"" +
name +
"\",";
601 throw std::runtime_error(msg);
611 for (
auto &column : requiredCols) {
614 const auto isDataSourceColumn =
615 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
616 if (isDataSourceColumn)
618 unknownColumns.emplace_back(column);
620 return unknownColumns;
623std::vector<std::string>
GetFilterNames(
const std::shared_ptr<RLoopManager> &loopManager)
625 return loopManager->GetFiltersNames();
631 std::string_view dirName =
"";
632 std::string_view treeName = fullTreeName;
633 const auto lastSlash = fullTreeName.rfind(
'/');
634 if (std::string_view::npos != lastSlash) {
635 dirName = treeName.substr(0, lastSlash);
636 treeName = treeName.substr(lastSlash + 1, treeName.size());
638 return {std::string(treeName), std::string(dirName)};
645 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
650std::shared_ptr<RDFDetail::RJittedFilter>
651BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> prevNode, std::string_view
name, std::string_view expression,
656 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
657 const auto exprVarTypes =
659 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
660 const auto type = RetTypeOfFunc(funcName);
662 throw std::runtime_error(
"Filter: the following expression does not evaluate to bool:\n" +
663 std::string(expression));
665 auto *lm = prevNode->GetLoopManagerUnchecked();
666 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
670 std::stringstream filterInvocation;
671 filterInvocation <<
"(const std::vector<std::string> &colNames, "
672 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
673 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
674 <<
"void *jittedFilter, "
675 <<
"std::shared_ptr<void> *) {\n";
676 filterInvocation <<
" ROOT::Internal::RDF::JitFilterHelper(" << funcName <<
", "
680 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedFilter*>(jittedFilter)"
682 lm->RegisterJitHelperCall(filterInvocation.str(),
683 std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
695 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
696 const auto exprVarTypes =
698 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
699 const auto type = RetTypeOfFunc(funcName);
701 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name,
type, lm, colRegister, parsedExpr.fUsedCols);
707 std::stringstream defineInvocation;
708 defineInvocation <<
"(const std::vector<std::string> &colNames, "
709 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
710 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
711 <<
"void *jittedDefine, "
712 <<
"std::shared_ptr<void> *) {\n";
713 defineInvocation <<
" ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
718 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
720 lm.
RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
721 parsedExpr.fUsedCols, jittedDefine);
730 const auto funcName = DeclareFunction(std::string(expression), {
"rdfslot_",
"rdfsampleinfo_"},
731 {
"unsigned int",
"const ROOT::RDF::RSampleInfo"});
732 const auto retType = RetTypeOfFunc(funcName);
734 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name, retType, lm, colRegister,
ColumnNames_t{});
740 std::stringstream defineInvocation;
741 defineInvocation <<
"(const std::vector<std::string> &colNames, "
742 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
743 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
744 <<
"void *jittedDefine, "
745 <<
"std::shared_ptr<void> *) {\n";
746 defineInvocation <<
" ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
751 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
753 lm.
RegisterJitHelperCall(defineInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister),
759std::shared_ptr<RJittedVariation>
761 const std::vector<std::string> &variationTags, std::string_view expression,
RLoopManager &lm,
763 const std::string &varyColType)
767 const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
768 const auto exprVarTypes =
770 const auto funcName =
771 DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes, isSingleColumn, varyColType);
772 const auto type = RetTypeOfFunc(funcName);
774 if (
type.rfind(
"ROOT::VecOps::RVec", 0) != 0) {
775 throw std::runtime_error(
776 "Jitted Vary expressions must return an RVec object. The following expression return type is '" +
type +
777 "' instead:\n" + parsedExpr.fExpr);
780 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags,
type, colRegister,
781 lm, parsedExpr.fUsedCols);
790 std::stringstream varyInvocation;
791 varyInvocation <<
"(const std::vector<std::string> &inputColNames, "
792 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
793 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
794 <<
"void *jittedVariation, "
795 <<
"std::shared_ptr<void> *helperArg) {\n";
797 <<
" auto *variedColNamesAndTags = reinterpret_cast<std::shared_ptr<std::pair<std::vector<std::string>, "
798 "std::vector<std::string>>> *>(helperArg);"
799 <<
" ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ?
"true" :
"false") <<
">(" << funcName
804 <<
"reinterpret_cast<ROOT::Internal::RDF::RJittedVariation *>(jittedVariation), "
805 <<
"(*variedColNamesAndTags)->first, "
806 <<
"(*variedColNamesAndTags)->second"
809 varyInvocation.str(), std::make_unique<ROOT::Internal::RDF::RColumnRegister>(colRegister), parsedExpr.fUsedCols,
811 std::make_shared<std::pair<std::vector<std::string>, std::vector<std::string>>>(colNames, variationTags));
812 return jittedVariation;
819 const bool vector2RVec)
823 if (!actionTypeClass) {
824 std::string exceptionText =
"An error occurred while inferring the action type of the operation.";
825 throw std::runtime_error(exceptionText);
827 const std::string actionTypeName = actionTypeClass->GetName();
828 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(
':') + 1);
832 if (helperArgTypeName.empty()) {
833 ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
838 std::stringstream createAction_str;
839 createAction_str <<
"(const std::vector<std::string> &colNames, "
840 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
841 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
842 <<
"void *jittedAction, "
843 <<
"std::shared_ptr<void> *helperArg) {\n";
844 createAction_str <<
" ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
845 const auto columnTypeNames =
GetValidatedArgTypes(cols, colRegister, tree, ds, actionTypeNameBase, vector2RVec);
846 for (
auto &colType : columnTypeNames)
847 createAction_str <<
", " << colType;
848 createAction_str <<
">("
852 <<
"reinterpret_cast<ROOT::Internal::RDF::RJittedAction *>(jittedAction), " << nSlots <<
", "
853 <<
"reinterpret_cast<std::shared_ptr<" << helperArgTypeName <<
"> *>(helperArg)"
855 return createAction_str.str();
860 for (
const auto &s : strings) {
867std::shared_ptr<RNodeBase>
UpcastNode(std::shared_ptr<RNodeBase> ptr)
882 for (
auto &col : selectedColumns) {
889 if (!unknownColumns.empty()) {
894 std::set<std::string> intersection;
896 std::sort(unknownColumns.begin(), unknownColumns.end());
897 std::set_intersection(unknownColumns.cbegin(), unknownColumns.cend(), colsToIgnore.cbegin(), colsToIgnore.cend(),
898 std::inserter(intersection, intersection.begin()));
899 if (intersection.empty()) {
900 std::string errMsg = std::string(
"Unknown column") + (unknownColumns.size() > 1 ?
"s: " :
": ");
901 for (
auto &unknownColumn : unknownColumns)
902 errMsg +=
'"' + unknownColumn +
"\", ";
903 errMsg.resize(errMsg.size() - 2);
904 throw std::runtime_error(errMsg);
908 return selectedColumns;
915 auto toCheckedArgType = [&](
const std::string &
c) {
916 RDFDetail::RDefineBase *define = colRegister.
GetDefine(
c);
918 if (colType.rfind(
"CLING_UNKNOWN_TYPE", 0) == 0) {
920 "The type of custom column \"" +
c +
"\" (" + colType.substr(19) +
921 ") is not known to the interpreter, but a just-in-time-compiled " + context +
922 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
923 throw std::runtime_error(msg);
927 std::vector<std::string> colTypes;
928 colTypes.reserve(colNames.size());
929 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
935 std::unordered_set<std::string> uniqueCols;
936 for (
auto &col : cols) {
937 if (!uniqueCols.insert(col).second) {
938 const auto msg =
"Error: column \"" + col +
939 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
940 "readable with RDataFrame.";
941 throw std::logic_error(msg);
949 std::string optionName;
954 optionName =
"fApproxZippedClusterSize";
956 optionName =
"fMaxUnzippedClusterSize";
958 optionName =
"fInitialUnzippedPageSize";
960 optionName =
"fMaxUnzippedPageSize";
962 optionName =
"fEnablePageChecksums";
964 optionName =
"fEnableSamePageMerging";
967 if (!optionName.empty()) {
969 "The RNTuple-specific %s option in RSnapshotOptions has been set, but the output format is "
970 "set to TTree, so this option won't have any effect. Use the other options available in "
971 "RSnapshotOptions to "
972 "configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.",
977 optionName =
"fAutoFlush";
979 optionName =
"fSplitLevel";
981 optionName =
"fBasketSize";
984 if (!optionName.empty()) {
987 "The TTree-specific %s option in RSnapshotOptions has been set, but the output format is set to RNTuple, "
988 "so this option won't have any effect. Use the fNTupleWriteOptions option available in RSnapshotOptions to "
989 "configure the output RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.",
997std::pair<std::vector<std::string>, std::vector<std::string>>
999 std::vector<std::string> &&colsWithAliases)
1005 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1007 assert(colsWithoutAliases.size() == colsWithAliases.size());
1009 auto nCols = colsWithoutAliases.size();
1011 for (std::size_t i = 0u; i < nCols; ++i) {
1012 const auto &colName = colsWithoutAliases[i];
1021 auto *leaves =
b->GetListOfLeaves();
1030 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->
GetName());
1031 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->
GetName());
1036 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1041 std::set<std::string> uniqueCols;
1043 std::remove_if(columnNames.begin(), columnNames.end(),
1044 [&uniqueCols](
const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1052 std::copy_if(columnNames.cbegin(), columnNames.cend(), std::back_inserter(parentFields),
1053 [](
const std::string &colName) { return colName.find(
'.') == std::string::npos; });
1055 columnNames.erase(std::remove_if(columnNames.begin(), columnNames.end(),
1056 [&parentFields](
const std::string &colName) {
1057 if (colName.find(
'.') == std::string::npos)
1059 const auto parentFieldName = colName.substr(0, colName.find_first_of(
'.'));
1060 return std::find(parentFields.cbegin(), parentFields.cend(), parentFieldName) !=
1085 std::vector<std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>> colReaders;
1086 colReaders.reserve(nSlots);
1088 for (
auto slot = 0u; slot < nSlots; ++slot)
1089 colReaders.emplace_back(
1098 const std::vector<const std::type_info *> &colTypeIDs,
1101 auto nCols = colNames.size();
1102 assert(nCols == colTypeIDs.size() &&
"Must provide exactly one column type for each column to create");
1103 for (
decltype(nCols) i{}; i < nCols; i++) {
1104 AddDataSourceColumn(colNames[i], *colTypeIDs[i], lm, ds, colRegister);
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
externTVirtualMutex * gROOTMutex
#define R__LOCKGUARD(mutex)
The head node of a RDF computation graph.
void RegisterJitHelperCall(const std::string &funcBody, std::unique_ptr< ROOT::Internal::RDF::RColumnRegister > colRegister, const std::vector< std::string > &colnames, std::shared_ptr< void > jittedNode, std::shared_ptr< void > argument=nullptr)
const std::set< std::string > & GetSuppressErrorsForMissingBranches() const
void AddDataSourceColumnReaders(std::string_view col, std::vector< std::unique_ptr< RColumnReaderBase > > &&readers, const std::type_info &ti)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
unsigned int GetNSlots() const
bool HasDataSourceColumnReaders(std::string_view col, const std::type_info &ti) const
Return true if AddDataSourceColumnReaders was called for column name col.
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
std::string_view ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
const char * GetName() const override
Returns name of object.
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
A TTree represents a columnar dataset.
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
virtual TTree * GetTree() const
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister)
Book the jitting of a DefinePerSample call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > prevNode, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, const bool vector2RVec)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
void RemoveRNTupleSubfields(ColumnNames_t &columnNames)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister)
Book the jitting of a Define call.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn, const std::string &varyColType)
Book the jitting of a Vary call.
std::vector< std::string > ColumnNames_t
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)
A collection of options to steer the creation of the dataset on disk through Snapshot().
std::size_t fMaxUnzippedPageSize
(RNTuple only) Maximum allowed page size before compression
int fAutoFlush
(TTree only) AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
bool fEnableSamePageMerging
(RNTuple only) Enable identical-page deduplication. Requires page checksumming
std::size_t fInitialUnzippedPageSize
(RNTuple only) Initial page size before compression
bool fEnablePageChecksums
(RNTuple only) Enable checksumming for pages
std::size_t fApproxZippedClusterSize
(RNTuple only) Approximate target compressed cluster size
int fSplitLevel
(TTree only) Split level of output tree
std::size_t fMaxUnzippedClusterSize
(RNTuple only) Maximum uncompressed cluster size
int fBasketSize
(TTree only) Set a custom basket size option.