39#pragma GCC diagnostic push
40#pragma GCC diagnostic ignored "-Woverloaded-virtual"
41#pragma GCC diagnostic ignored "-Wshadow"
45#pragma GCC diagnostic pop
60#include <unordered_map>
61#include <unordered_set>
84struct ParsedExpression {
89 ColumnNames_t fUsedCols;
91 ColumnNames_t fVarNames;
95std::pair<ColumnNames_t, ColumnNames_t>
96FindUsedColsAndAliases(
const std::string &expr,
const ColumnNames_t &treeBranchNames,
99 lexertk::generator tokens;
100 const auto tokensOk = tokens.process(expr);
102 const auto msg =
"Failed to tokenize expression:\n" + expr +
"\n\nMake sure it is valid C++.";
103 throw std::runtime_error(msg);
106 std::unordered_set<std::string> usedCols;
107 std::unordered_set<std::string> usedAliases;
110 const auto nTokens = tokens.size();
111 const auto kSymbol = lexertk::token::e_symbol;
112 for (
auto i = 0u; i < nTokens; ++i) {
113 const auto &tok = tokens[i];
115 if (tok.type != kSymbol || tok.value ==
"&" || tok.value ==
"|") {
123 auto dotChainKeepsGoing = [&](
unsigned int _i) {
124 return _i + 2 <= nTokens && tokens[_i + 1].value ==
"." && tokens[_i + 2].type == kSymbol;
126 while (dotChainKeepsGoing(i)) {
127 potentialColNames.emplace_back(potentialColNames.back() +
"." + tokens[i + 2].value);
133 const auto maybeAnAlias = potentialColNames[0];
134 const auto &resolvedAlias = colRegister.
ResolveAlias(maybeAnAlias);
135 if (resolvedAlias != maybeAnAlias) {
136 usedAliases.insert(maybeAnAlias);
137 for (
auto &s : potentialColNames)
138 s.replace(0, maybeAnAlias.
size(), resolvedAlias);
143 auto isRDFColumn = [&](
const std::string &col) {
149 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
150 if (longestRDFColMatch != potentialColNames.crend())
151 usedCols.insert(*longestRDFColMatch);
154 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
158std::string EscapeDots(
const std::string &s)
162 dot.Substitute(out,
"\\.",
"g");
163 return std::string(std::move(out));
166TString ResolveAliases(
const TString &expr,
const ColumnNames_t &usedAliases,
171 for (
const auto &alias : usedAliases) {
173 TPRegexp replacer(
"\\b" + EscapeDots(alias) +
"\\b");
174 replacer.Substitute(out, col.data(),
"g");
180ParsedExpression ParseRDFExpression(std::string_view expr,
const ColumnNames_t &treeBranchNames,
182 const ColumnNames_t &dataSourceColNames)
185 TString preProcessedExpr(expr);
188 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
189 colSizeReplacer.Substitute(preProcessedExpr,
"$1R_rdf_sizeof_$3",
"g");
193 std::tie(usedCols, usedAliases) =
194 FindUsedColsAndAliases(std::string(preProcessedExpr), treeBranchNames, colRegister, dataSourceColNames);
196 const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
200 TString exprWithVars(exprNoAliases);
203 for (
auto i = 0u; i < varNames.size(); ++i)
204 varNames[i] =
"var" + std::to_string(i);
211 std::sort(usedCols.begin(), usedCols.end(),
212 [](
const std::string &
a,
const std::string &
b) { return a.size() > b.size(); });
213 for (
const auto &col : usedCols) {
214 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
215 TPRegexp replacer(
"\\b" + EscapeDots(col) +
"\\b");
216 replacer.Substitute(exprWithVars, varNames[varIdx],
"g");
219 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
229std::unordered_map<std::string, std::string> &GetJittedExprs() {
230 static std::unordered_map<std::string, std::string> jittedExpressions;
231 return jittedExpressions;
235BuildFunctionString(
const std::string &expr,
const ColumnNames_t &vars,
const ColumnNames_t &varTypes)
237 assert(vars.size() == varTypes.size());
240 const bool hasReturnStmt = re.MatchB(expr);
242 static const std::vector<std::string> fundamentalTypes = {
275 std::stringstream ss;
277 for (
auto i = 0u; i < vars.size(); ++i) {
278 std::string fullType;
279 const auto &
type = varTypes[i];
280 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(),
type) != fundamentalTypes.end()) {
282 fullType =
"const " +
type +
" ";
286 fullType =
type +
"& ";
288 ss << fullType << vars[i] <<
", ";
291 ss.seekp(-2, ss.cur);
297 ss << expr <<
"\n;}";
304std::string DeclareFunction(
const std::string &expr,
const ColumnNames_t &vars,
const ColumnNames_t &varTypes)
308 const auto funcCode = BuildFunctionString(expr, vars, varTypes);
309 auto &exprMap = GetJittedExprs();
310 const auto exprIt = exprMap.find(funcCode);
311 if (exprIt != exprMap.end()) {
313 const auto funcName = exprIt->second;
318 const auto funcBaseName =
"func" + std::to_string(exprMap.size());
319 const auto funcFullName =
"R_rdf::" + funcBaseName;
321 const auto toDeclare =
"namespace R_rdf {\nauto " + funcBaseName + funcCode +
"\nusing " + funcBaseName +
322 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
327 exprMap.insert({funcCode, funcFullName});
334std::string RetTypeOfFunc(
const std::string &funcName)
336 const auto dt =
gROOT->GetType((funcName +
"_ret_t").c_str());
338 const auto type = dt->GetFullTypeName();
343ThrowJitBuildActionHelperTypeError(
const std::string &actionTypeNameBase,
const std::type_info &helperArgType)
347 std::string actionHelperTypeName =
cname;
350 actionHelperTypeName = helperArgType.name();
352 std::string exceptionText =
353 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase +
"\" action using helper type \"" +
354 actionHelperTypeName +
355 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
356 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
357 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
358 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
359 "interpreter, e.g. via gInterpreter->Declare.";
361 throw std::runtime_error(exceptionText);
376 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
377 [&](
const std::string &
name) {
378 if (name[0] ==
'#') {
379 filteredColumns.emplace_back(name);
386 if (!filteredColumns.empty()) {
387 std::string msg =
"Column name(s) {";
388 for (
auto &
c : filteredColumns)
390 msg[msg.size() - 2] =
'}';
391 msg +=
"will be ignored. Please go through a valid Alias to " + action +
" an array size column";
392 throw std::runtime_error(msg);
395 return columnListWithoutSizeColumns;
398std::string
ResolveAlias(
const std::string &col,
const std::map<std::string, std::string> &aliasMap)
400 const auto it = aliasMap.find(col);
401 if (it != aliasMap.end())
405 if (col.size() > 1 && col[0] ==
'#')
406 return "R_rdf_sizeof_" + col.substr(1);
417 const char firstChar = var[0];
420 auto isALetter = [](
char c) {
return (
c >=
'A' &&
c <=
'Z') || (
c >=
'a' &&
c <=
'z'); };
421 const bool isValidFirstChar = firstChar ==
'_' || isALetter(firstChar);
422 if (!isValidFirstChar)
426 auto isANumber = [](
char c) {
return c >=
'0' &&
c <=
'9'; };
427 auto isValidTok = [&isALetter, &isANumber](
char c) {
return c ==
'_' || isALetter(
c) || isANumber(
c); };
428 for (
const char c : var)
433 const auto objName = where ==
"Define" ?
"column" :
"variation";
434 const auto error =
"RDataFrame::" + where +
": cannot define " + objName +
" \"" + std::string(var) +
435 "\". Not a valid C++ variable name.";
436 throw std::runtime_error(error);
440std::string DemangleTypeIdName(
const std::type_info &typeInfo)
444 std::string tname(tn);
452 const auto theRegexSize = columnNameRegexp.size();
453 std::string theRegex(columnNameRegexp);
455 const auto isEmptyRegex = 0 == theRegexSize;
457 if (theRegexSize > 0 && theRegex[0] !=
'^')
458 theRegex =
"^" + theRegex;
459 if (theRegexSize > 0 && theRegex[theRegexSize - 1] !=
'$')
460 theRegex = theRegex +
"$";
467 for (
auto &&colName : colNames) {
469 selectedColumns.emplace_back(colName);
473 if (selectedColumns.empty()) {
474 std::string
text(callerName);
475 if (columnNameRegexp.empty()) {
476 text =
": there is no column available to match.";
478 text =
": regex \"" + std::string(columnNameRegexp) +
"\" did not match any column.";
480 throw std::runtime_error(
text);
482 return selectedColumns;
491 if (colRegister.
IsAlias(definedColView))
492 error =
"An alias with that name, pointing to column \"" + std::string(colRegister.
ResolveAlias(definedColView)) +
493 "\", already exists in this branch of the computation graph.";
495 error =
"A column with that name has already been Define'd. Use Redefine to force redefinition.";
499 else if (std::find(treeColumns.begin(), treeColumns.end(), definedColView) != treeColumns.end())
501 "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
502 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end())
504 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
506 if (!error.empty()) {
507 error =
"RDataFrame::" + where +
": cannot define column \"" + std::string(definedColView) +
"\". " + error;
508 throw std::runtime_error(error);
518 if (colRegister.
IsAlias(definedColView)) {
519 error =
"An alias with that name, pointing to column \"" + std::string(colRegister.
ResolveAlias(definedColView)) +
520 "\", already exists. Aliases cannot be Redefined or Varied.";
524 const bool isAlreadyDefined = colRegister.
IsDefineOrAlias(definedColView);
528 const bool isABranch = std::find(treeColumns.begin(), treeColumns.end(), definedColView) != treeColumns.end();
529 const bool isADSColumn =
530 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end();
532 if (!isAlreadyDefined && !isABranch && !isADSColumn)
533 error =
"No column with that name was found in the dataset. Use Define to create a new column.";
536 if (!error.empty()) {
537 if (where ==
"DefaultValueFor")
538 error =
"RDataFrame::" + where +
": cannot provide default values for column \"" +
539 std::string(definedColView) +
"\". " + error;
541 error =
"RDataFrame::" + where +
": cannot redefine or vary column \"" + std::string(definedColView) +
"\". " +
543 throw std::runtime_error(error);
550 const std::string definedCol(definedColView);
552 if (!variationDeps.empty()) {
553 if (where ==
"Redefine") {
554 const std::string error =
"RDataFrame::" + where +
": cannot redefine column \"" + definedCol +
555 "\". The column depends on one or more systematic variations and re-defining varied "
556 "columns is not supported.";
557 throw std::runtime_error(error);
558 }
else if (where ==
"DefaultValueFor") {
559 const std::string error =
"RDataFrame::" + where +
": cannot provide a default value for column \"" +
561 "\". The column depends on one or more systematic variations and it should not be "
562 "possible to have missing values in varied columns.";
563 throw std::runtime_error(error);
565 const std::string error =
566 "RDataFrame::" + where +
": this operation cannot work with columns that depend on systematic variations.";
567 throw std::runtime_error(error);
574 if (nTemplateParams != nColumnNames) {
575 std::string err_msg =
"The number of template parameters specified is ";
576 err_msg += std::to_string(nTemplateParams);
577 err_msg +=
" while ";
578 err_msg += std::to_string(nColumnNames);
579 err_msg +=
" columns have been specified.";
580 throw std::runtime_error(err_msg);
590 if (defaultNames.size() < nRequiredNames)
591 throw std::runtime_error(
592 std::to_string(nRequiredNames) +
" column name" + (nRequiredNames == 1 ?
" is" :
"s are") +
593 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
595 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
598 if (names.size() != nRequiredNames) {
599 auto msg = std::to_string(nRequiredNames) +
" column name" + (nRequiredNames == 1 ?
" is" :
"s are") +
600 " required but " + std::to_string(names.size()) + (names.size() == 1 ?
" was" :
" were") +
602 for (
const auto &
name : names)
603 msg +=
" \"" +
name +
"\",";
605 throw std::runtime_error(msg);
615 for (
auto &column : requiredCols) {
616 const auto isBranch = std::find(datasetColumns.begin(), datasetColumns.end(), column) != datasetColumns.end();
621 const auto isDataSourceColumn =
622 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
623 if (isDataSourceColumn)
625 unknownColumns.emplace_back(column);
627 return unknownColumns;
630std::vector<std::string>
GetFilterNames(
const std::shared_ptr<RLoopManager> &loopManager)
632 return loopManager->GetFiltersNames();
638 std::string_view dirName =
"";
639 std::string_view treeName = fullTreeName;
640 const auto lastSlash = fullTreeName.rfind(
'/');
641 if (std::string_view::npos != lastSlash) {
642 dirName = treeName.substr(0, lastSlash);
643 treeName = treeName.substr(lastSlash + 1, treeName.size());
645 return {std::string(treeName), std::string(dirName)};
652 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
657std::shared_ptr<RDFDetail::RJittedFilter>
658BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view
name, std::string_view expression,
663 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
664 const auto exprVarTypes =
666 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
667 const auto type = RetTypeOfFunc(funcName);
669 std::runtime_error(
"Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
676 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
677 (*prevNodeOnHeap)->GetLoopManagerUnchecked(),
name,
682 std::stringstream filterInvocation;
683 filterInvocation <<
"ROOT::Internal::RDF::JitFilterHelper(" << funcName <<
", new const char*["
684 << parsedExpr.fUsedCols.size() <<
"]{";
685 for (
const auto &col : parsedExpr.fUsedCols)
686 filterInvocation <<
"\"" << col <<
"\", ";
687 if (!parsedExpr.fUsedCols.empty())
688 filterInvocation.seekp(-2, filterInvocation.cur);
693 filterInvocation <<
"}, " << parsedExpr.fUsedCols.size() <<
", \"" <<
name <<
"\", "
694 <<
"reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>("
696 <<
"reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr <<
"),"
697 <<
"reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr <<
")"
700 auto lm = jittedFilter->GetLoopManagerUnchecked();
701 lm->ToJitExec(filterInvocation.str());
710 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
712 auto *
const tree = lm.
GetTree();
715 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
716 const auto exprVarTypes =
718 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
719 const auto type = RetTypeOfFunc(funcName);
723 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name,
type, lm, colRegister, parsedExpr.fUsedCols);
725 std::stringstream defineInvocation;
726 defineInvocation <<
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>(" << funcName
727 <<
", new const char*[" << parsedExpr.fUsedCols.size() <<
"]{";
728 for (
const auto &col : parsedExpr.fUsedCols) {
729 defineInvocation <<
"\"" << col <<
"\", ";
731 if (!parsedExpr.fUsedCols.empty())
732 defineInvocation.seekp(-2, defineInvocation.cur);
737 defineInvocation <<
"}, " << parsedExpr.fUsedCols.size() <<
", \"" <<
name
738 <<
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" <<
PrettyPrintAddr(&lm)
739 <<
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
741 <<
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
742 <<
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
752 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
754 const auto funcName = DeclareFunction(std::string(expression), {
"rdfslot_",
"rdfsampleinfo_"},
755 {
"unsigned int",
"const ROOT::RDF::RSampleInfo"});
756 const auto retType = RetTypeOfFunc(funcName);
760 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name, retType, lm, colRegister,
ColumnNames_t{});
762 std::stringstream defineInvocation;
763 defineInvocation <<
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
764 << funcName <<
", nullptr, 0, ";
769 defineInvocation <<
"\"" <<
name <<
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" <<
PrettyPrintAddr(&lm)
770 <<
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
772 <<
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
773 <<
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
781std::shared_ptr<RJittedVariation>
783 const std::vector<std::string> &variationTags, std::string_view expression,
RLoopManager &lm,
785 std::shared_ptr<RNodeBase> *upcastNodeOnHeap,
bool isSingleColumn)
787 auto *
const tree = lm.
GetTree();
790 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
791 const auto exprVarTypes =
793 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
794 const auto type = RetTypeOfFunc(funcName);
796 if (
type.rfind(
"ROOT::VecOps::RVec", 0) != 0) {
798 delete upcastNodeOnHeap;
799 upcastNodeOnHeap =
nullptr;
800 throw std::runtime_error(
801 "Jitted Vary expressions must return an RVec object. The following expression returns a " +
type +
802 " instead:\n" + parsedExpr.fExpr);
807 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags,
type, colRegister,
808 lm, parsedExpr.fUsedCols);
816 std::stringstream varyInvocation;
817 varyInvocation <<
"ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ?
"true" :
"false") <<
">("
818 << funcName <<
", new const char*[" << parsedExpr.fUsedCols.size() <<
"]{";
819 for (
const auto &col : parsedExpr.fUsedCols) {
820 varyInvocation <<
"\"" << col <<
"\", ";
822 if (!parsedExpr.fUsedCols.empty())
823 varyInvocation.seekp(-2, varyInvocation.cur);
824 varyInvocation <<
"}, " << parsedExpr.fUsedCols.size();
825 varyInvocation <<
", new const char*[" << colNames.size() <<
"]{";
826 for (
const auto &col : colNames) {
827 varyInvocation <<
"\"" << col <<
"\", ";
829 varyInvocation.seekp(-2, varyInvocation.cur);
830 varyInvocation <<
"}, " << colNames.size() <<
", new const char*[" << variationTags.size() <<
"]{";
831 for (
const auto &tag : variationTags) {
832 varyInvocation <<
"\"" << tag <<
"\", ";
834 varyInvocation.seekp(-2, varyInvocation.cur);
835 varyInvocation <<
"}, " << variationTags.size() <<
", \"" << variationName
836 <<
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" <<
PrettyPrintAddr(&lm)
837 <<
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>("
839 <<
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
840 <<
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
844 return jittedVariation;
850 const std::type_info &helperArgType,
const std::type_info &at,
void *helperArgOnHeap,
852 std::weak_ptr<RJittedAction> *jittedActionOnHeap,
const bool vector2RVec)
856 if (!actionTypeClass) {
857 std::string exceptionText =
"An error occurred while inferring the action type of the operation.";
858 throw std::runtime_error(exceptionText);
860 const std::string actionTypeName = actionTypeClass->GetName();
861 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(
':') + 1);
865 if (helperArgTypeName.empty()) {
866 ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
874 std::stringstream createAction_str;
875 createAction_str <<
"ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
876 const auto columnTypeNames =
GetValidatedArgTypes(cols, colRegister, tree, ds, actionTypeNameBase, vector2RVec);
877 for (
auto &colType : columnTypeNames)
878 createAction_str <<
", " << colType;
881 createAction_str <<
">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
882 <<
PrettyPrintAddr(prevNode) <<
"), new const char*[" << cols.size() <<
"]{";
883 for (
auto i = 0u; i < cols.size(); ++i) {
885 createAction_str <<
", ";
886 createAction_str <<
'"' << cols[i] <<
'"';
888 createAction_str <<
"}, " << cols.size() <<
", " << nSlots <<
", reinterpret_cast<shared_ptr<" << helperArgTypeName
890 <<
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>("
892 <<
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr <<
"));";
893 return createAction_str.str();
898 for (
const auto &s : strings) {
905std::shared_ptr<RNodeBase>
UpcastNode(std::shared_ptr<RNodeBase> ptr)
920 for (
auto &col : selectedColumns) {
928 if (!unknownColumns.empty()) {
933 std::set<std::string> intersection;
935 std::sort(unknownColumns.begin(), unknownColumns.end());
936 std::sort(colsToIgnore.begin(), colsToIgnore.end());
937 std::set_intersection(unknownColumns.begin(), unknownColumns.end(), colsToIgnore.begin(), colsToIgnore.end(),
938 std::inserter(intersection, intersection.begin()));
939 if (intersection.empty()) {
940 std::string errMsg = std::string(
"Unknown column") + (unknownColumns.size() > 1 ?
"s: " :
": ");
941 for (
auto &unknownColumn : unknownColumns)
942 errMsg +=
'"' + unknownColumn +
"\", ";
943 errMsg.resize(errMsg.size() - 2);
944 throw std::runtime_error(errMsg);
948 return selectedColumns;
955 auto toCheckedArgType = [&](
const std::string &
c) {
958 if (colType.rfind(
"CLING_UNKNOWN_TYPE", 0) == 0) {
960 "The type of custom column \"" +
c +
"\" (" + colType.substr(19) +
961 ") is not known to the interpreter, but a just-in-time-compiled " + context +
962 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
963 throw std::runtime_error(msg);
967 std::vector<std::string> colTypes;
968 colTypes.reserve(colNames.size());
969 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
978 const auto nColumns = requestedCols.size();
979 std::vector<bool> mustBeDefined(nColumns,
false);
980 for (
auto i = 0u; i < nColumns; ++i)
981 mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
982 return mustBeDefined;
987 std::unordered_set<std::string> uniqueCols;
988 for (
auto &col : cols) {
989 if (!uniqueCols.insert(col).second) {
990 const auto msg =
"Error: column \"" + col +
991 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
992 "readable with RDataFrame.";
993 throw std::logic_error(msg);
1000std::pair<std::vector<std::string>, std::vector<std::string>>
1001AddSizeBranches(
const std::vector<std::string> &branches,
TTree *tree, std::vector<std::string> &&colsWithoutAliases,
1002 std::vector<std::string> &&colsWithAliases)
1005 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1007 assert(colsWithoutAliases.size() == colsWithAliases.size());
1009 auto nCols = colsWithoutAliases.size();
1011 for (std::size_t i = 0u; i < nCols; ++i) {
1012 const auto &colName = colsWithoutAliases[i];
1016 auto *
b = tree->GetBranch(colName.c_str());
1018 b = tree->FindBranch(colName.c_str());
1019 assert(
b !=
nullptr);
1020 auto *leaves =
b->GetListOfLeaves();
1024 TLeaf *countLeaf =
static_cast<TLeaf *
>(leaves->At(0))->GetLeafCount();
1029 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->
GetName());
1030 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->
GetName());
1035 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1040 std::set<std::string> uniqueCols;
1042 std::remove_if(columnNames.begin(), columnNames.end(),
1043 [&uniqueCols](
const std::string &colName) { return !uniqueCols.insert(colName).second; }),
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
R__EXTERN TVirtualMutex * gROOTMutex
#define R__LOCKGUARD(mutex)
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
std::vector< std::string > & GetSuppressErrorsForMissingBranches()
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
std::string_view ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
const char * GetName() const override
Returns name of object.
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
A TTree represents a columnar dataset.
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap, const bool vector2RVec)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)