39#pragma GCC diagnostic push 
   40#pragma GCC diagnostic ignored "-Woverloaded-virtual" 
   41#pragma GCC diagnostic ignored "-Wshadow" 
   45#pragma GCC diagnostic pop 
   60#include <unordered_map> 
   61#include <unordered_set> 
   84struct ParsedExpression {
 
   89   ColumnNames_t fUsedCols;
 
   91   ColumnNames_t fVarNames;
 
   95static std::pair<ColumnNames_t, ColumnNames_t>
 
   96FindUsedColsAndAliases(
const std::string &expr, 
const ColumnNames_t &treeBranchNames,
 
   99   lexertk::generator tokens;
 
  100   const auto tokensOk = tokens.process(expr);
 
  102      const auto msg = 
"Failed to tokenize expression:\n" + expr + 
"\n\nMake sure it is valid C++.";
 
  103      throw std::runtime_error(msg);
 
  106   std::unordered_set<std::string> usedCols;
 
  107   std::unordered_set<std::string> usedAliases;
 
  110   const auto nTokens = tokens.size();
 
  111   const auto kSymbol = lexertk::token::e_symbol;
 
  112   for (
auto i = 0u; i < nTokens; ++i) {
 
  113      const auto &tok = tokens[i];
 
  115      if (tok.type != kSymbol || tok.value == 
"&" || tok.value == 
"|") {
 
  123      auto dotChainKeepsGoing = [&](
unsigned int _i) {
 
  124         return _i + 2 <= nTokens && tokens[_i + 1].value == 
"." && tokens[_i + 2].type == kSymbol;
 
  126      while (dotChainKeepsGoing(i)) {
 
  127         potentialColNames.emplace_back(potentialColNames.back() + 
"." + tokens[i + 2].value);
 
  133      const auto maybeAnAlias = potentialColNames[0]; 
 
  134      const auto &resolvedAlias = colRegister.
ResolveAlias(maybeAnAlias);
 
  135      if (resolvedAlias != maybeAnAlias) { 
 
  136         usedAliases.insert(maybeAnAlias);
 
  137         for (
auto &s : potentialColNames)
 
  138            s.replace(0, maybeAnAlias.
size(), resolvedAlias);
 
  143      auto isRDFColumn = [&](
const std::string &col) {
 
  149      const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
 
  150      if (longestRDFColMatch != potentialColNames.crend())
 
  151         usedCols.insert(*longestRDFColMatch);
 
  154   return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
 
  158static std::string EscapeDots(
const std::string &s)
 
  162   dot.Substitute(out, 
"\\.", 
"g");
 
  163   return std::string(std::move(out));
 
  166static TString ResolveAliases(
const TString &expr, 
const ColumnNames_t &usedAliases,
 
  171   for (
const auto &alias : usedAliases) {
 
  173      TPRegexp replacer(
"\\b" + EscapeDots(alias) + 
"\\b");
 
  174      replacer.Substitute(out, col, 
"g");
 
  180static ParsedExpression ParseRDFExpression(std::string_view expr, 
const ColumnNames_t &treeBranchNames,
 
  182                                           const ColumnNames_t &dataSourceColNames)
 
  185   TString preProcessedExpr(expr);
 
  188      "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
 
  189   colSizeReplacer.Substitute(preProcessedExpr, 
"$1R_rdf_sizeof_$3", 
"g");
 
  193   std::tie(usedCols, usedAliases) =
 
  194      FindUsedColsAndAliases(std::string(preProcessedExpr), treeBranchNames, colRegister, dataSourceColNames);
 
  196   const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
 
  200   TString exprWithVars(exprNoAliases);
 
  203   for (
auto i = 0u; i < varNames.size(); ++i)
 
  204      varNames[i] = 
"var" + std::to_string(i);
 
  211   std::sort(usedCols.begin(), usedCols.end(),
 
  212             [](
const std::string &
a, 
const std::string &
b) { return a.size() > b.size(); });
 
  213   for (
const auto &col : usedCols) {
 
  214      const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
 
  215      TPRegexp replacer(
"\\b" + EscapeDots(col) + 
"\\b");
 
  216      replacer.Substitute(exprWithVars, varNames[varIdx], 
"g");
 
  219   return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
 
  229static std::unordered_map<std::string, std::string> &GetJittedExprs() {
 
  230   static std::unordered_map<std::string, std::string> jittedExpressions;
 
  231   return jittedExpressions;
 
  235BuildFunctionString(
const std::string &expr, 
const ColumnNames_t &vars, 
const ColumnNames_t &varTypes)
 
  237   assert(vars.size() == varTypes.size());
 
  240   const bool hasReturnStmt = re.MatchB(expr);
 
  242   static const std::vector<std::string> fundamentalTypes = {
 
  275   std::stringstream ss;
 
  277   for (
auto i = 0u; i < vars.size(); ++i) {
 
  278      std::string fullType;
 
  279      const auto &
type = varTypes[i];
 
  280      if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), 
type) != fundamentalTypes.end()) {
 
  282         fullType = 
"const " + 
type + 
" ";
 
  286         fullType = 
type + 
"& ";
 
  288      ss << fullType << vars[i] << 
", ";
 
  291      ss.seekp(-2, ss.cur);
 
  297   ss << expr << 
"\n;}";
 
  304static std::string DeclareFunction(
const std::string &expr, 
const ColumnNames_t &vars, 
const ColumnNames_t &varTypes)
 
  308   const auto funcCode = BuildFunctionString(expr, vars, varTypes);
 
  309   auto &exprMap = GetJittedExprs();
 
  310   const auto exprIt = exprMap.find(funcCode);
 
  311   if (exprIt != exprMap.end()) {
 
  313      const auto funcName = exprIt->second;
 
  318   const auto funcBaseName = 
"func" + std::to_string(exprMap.size());
 
  319   const auto funcFullName = 
"R_rdf::" + funcBaseName;
 
  321   const auto toDeclare = 
"namespace R_rdf {\nauto " + funcBaseName + funcCode + 
"\nusing " + funcBaseName +
 
  322                          "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
 
  327   exprMap.insert({funcCode, funcFullName});
 
  334static std::string RetTypeOfFunc(
const std::string &funcName)
 
  336   const auto dt = 
gROOT->GetType((funcName + 
"_ret_t").c_str());
 
  338   const auto type = dt->GetFullTypeName();
 
  343ThrowJitBuildActionHelperTypeError(
const std::string &actionTypeNameBase, 
const std::type_info &helperArgType)
 
  347   std::string actionHelperTypeName = 
cname;
 
  350      actionHelperTypeName = helperArgType.name();
 
  352   std::string exceptionText =
 
  353      "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + 
"\" action using helper type \"" +
 
  354      actionHelperTypeName +
 
  355      "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have " 
  356      "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action " 
  357      "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. " 
  358      "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the " 
  359      "interpreter, e.g. via gInterpreter->Declare.";
 
  361   throw std::runtime_error(exceptionText);
 
  376   std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
 
  377                [&](
const std::string &
name) {
 
  378                   if (name[0] == 
'#') {
 
  379                     filteredColumns.emplace_back(name);
 
  386   if (!filteredColumns.empty()) {
 
  387      std::string msg = 
"Column name(s) {";
 
  388      for (
auto &
c : filteredColumns)
 
  390      msg[msg.size() - 2] = 
'}';
 
  391      msg += 
"will be ignored. Please go through a valid Alias to " + action + 
" an array size column";
 
  392      throw std::runtime_error(msg);
 
  395   return columnListWithoutSizeColumns;
 
  398std::string 
ResolveAlias(
const std::string &col, 
const std::map<std::string, std::string> &aliasMap)
 
  400   const auto it = aliasMap.find(col);
 
  401   if (it != aliasMap.end())
 
  405   if (col.size() > 1 && col[0] == 
'#')
 
  406      return "R_rdf_sizeof_" + col.substr(1);
 
  417   const char firstChar = var[0];
 
  420   auto isALetter = [](
char c) { 
return (
c >= 
'A' && 
c <= 
'Z') || (
c >= 
'a' && 
c <= 
'z'); };
 
  421   const bool isValidFirstChar = firstChar == 
'_' || isALetter(firstChar);
 
  422   if (!isValidFirstChar)
 
  426   auto isANumber = [](
char c) { 
return c >= 
'0' && 
c <= 
'9'; };
 
  427   auto isValidTok = [&isALetter, &isANumber](
char c) { 
return c == 
'_' || isALetter(
c) || isANumber(
c); };
 
  428   for (
const char c : var)
 
  433      const auto objName = where == 
"Define" ? 
"column" : 
"variation";
 
  434      const auto error = 
"RDataFrame::" + where + 
": cannot define " + objName + 
" \"" + std::string(var) +
 
  435                         "\". Not a valid C++ variable name.";
 
  436      throw std::runtime_error(error);
 
  440std::string DemangleTypeIdName(
const std::type_info &typeInfo)
 
  444   std::string tname(tn);
 
  452   const auto theRegexSize = columnNameRegexp.size();
 
  453   std::string theRegex(columnNameRegexp);
 
  455   const auto isEmptyRegex = 0 == theRegexSize;
 
  457   if (theRegexSize > 0 && theRegex[0] != 
'^')
 
  458      theRegex = 
"^" + theRegex;
 
  459   if (theRegexSize > 0 && theRegex[theRegexSize - 1] != 
'$')
 
  460      theRegex = theRegex + 
"$";
 
  467   for (
auto &&colName : colNames) {
 
  469         selectedColumns.emplace_back(colName);
 
  473   if (selectedColumns.empty()) {
 
  474      std::string 
text(callerName);
 
  475      if (columnNameRegexp.empty()) {
 
  476         text = 
": there is no column available to match.";
 
  478         text = 
": regex \"" + std::string(columnNameRegexp) + 
"\" did not match any column.";
 
  480      throw std::runtime_error(
text);
 
  482   return selectedColumns;
 
  489   const std::string definedCol(definedColView); 
 
  492   if (colRegister.
IsAlias(definedCol))
 
  493      error = 
"An alias with that name, pointing to column \"" + colRegister.
ResolveAlias(definedCol) +
 
  494              "\", already exists in this branch of the computation graph.";
 
  496      error = 
"A column with that name has already been Define'd. Use Redefine to force redefinition.";
 
  500   else if (std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end())
 
  502         "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
 
  503   else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end())
 
  505         "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
 
  507   if (!error.empty()) {
 
  508      error = 
"RDataFrame::" + where + 
": cannot define column \"" + definedCol + 
"\". " + error;
 
  509      throw std::runtime_error(error);
 
  517   const std::string definedCol(definedColView); 
 
  520   if (colRegister.
IsAlias(definedCol)) {
 
  521      error = 
"An alias with that name, pointing to column \"" + colRegister.
ResolveAlias(definedCol) +
 
  522              "\", already exists. Aliases cannot be Redefined or Varied.";
 
  530      const bool isABranch = std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end();
 
  531      const bool isADSColumn =
 
  532         std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end();
 
  534      if (!isAlreadyDefined && !isABranch && !isADSColumn)
 
  535         error = 
"No column with that name was found in the dataset. Use Define to create a new column.";
 
  538   if (!error.empty()) {
 
  539      error = 
"RDataFrame::" + where + 
": cannot redefine or vary column \"" + definedCol + 
"\". " + error;
 
  540      throw std::runtime_error(error);
 
  547   const std::string definedCol(definedColView);
 
  549   if (!variationDeps.empty()) {
 
  550      const std::string error =
 
  551         "RDataFrame::" + where + 
": cannot redefine column \"" + definedCol +
 
  552         "\". The column depends on one or more systematic variations and re-defining varied columns is not supported.";
 
  553      throw std::runtime_error(error);
 
  559   if (nTemplateParams != nColumnNames) {
 
  560      std::string err_msg = 
"The number of template parameters specified is ";
 
  561      err_msg += std::to_string(nTemplateParams);
 
  562      err_msg += 
" while ";
 
  563      err_msg += std::to_string(nColumnNames);
 
  564      err_msg += 
" columns have been specified.";
 
  565      throw std::runtime_error(err_msg);
 
  575      if (defaultNames.size() < nRequiredNames)
 
  576         throw std::runtime_error(
 
  577            std::to_string(nRequiredNames) + 
" column name" + (nRequiredNames == 1 ? 
" is" : 
"s are") +
 
  578            " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
 
  580      return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
 
  583      if (names.size() != nRequiredNames) {
 
  584         auto msg = std::to_string(nRequiredNames) + 
" column name" + (nRequiredNames == 1 ? 
" is" : 
"s are") +
 
  585                    " required but " + std::to_string(names.size()) + (names.size() == 1 ? 
" was" : 
" were") +
 
  587         for (
const auto &
name : names)
 
  588            msg += 
" \"" + 
name + 
"\",";
 
  590         throw std::runtime_error(msg);
 
  600   for (
auto &column : requiredCols) {
 
  601      const auto isBranch = std::find(datasetColumns.begin(), datasetColumns.end(), column) != datasetColumns.end();
 
  606      const auto isDataSourceColumn =
 
  607         std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
 
  608      if (isDataSourceColumn)
 
  610      unknownColumns.emplace_back(column);
 
  612   return unknownColumns;
 
  615std::vector<std::string> 
GetFilterNames(
const std::shared_ptr<RLoopManager> &loopManager)
 
  617   return loopManager->GetFiltersNames();
 
  623   std::string_view dirName = 
"";
 
  624   std::string_view treeName = fullTreeName;
 
  625   const auto lastSlash = fullTreeName.rfind(
'/');
 
  626   if (std::string_view::npos != lastSlash) {
 
  627      dirName = treeName.substr(0, lastSlash);
 
  628      treeName = treeName.substr(lastSlash + 1, treeName.size());
 
  630   return {std::string(treeName), std::string(dirName)};
 
  637   s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
 
  642std::shared_ptr<RDFDetail::RJittedFilter>
 
  643BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view 
name, std::string_view expression,
 
  648   const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
 
  649   const auto exprVarTypes =
 
  651   const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
 
  652   const auto type = RetTypeOfFunc(funcName);
 
  654      std::runtime_error(
"Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
 
  661   const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
 
  662      (*prevNodeOnHeap)->GetLoopManagerUnchecked(), 
name,
 
  667   std::stringstream filterInvocation;
 
  668   filterInvocation << 
"ROOT::Internal::RDF::JitFilterHelper(" << funcName << 
", new const char*[" 
  669                    << parsedExpr.fUsedCols.size() << 
"]{";
 
  670   for (
const auto &col : parsedExpr.fUsedCols)
 
  671      filterInvocation << 
"\"" << col << 
"\", ";
 
  672   if (!parsedExpr.fUsedCols.empty())
 
  673      filterInvocation.seekp(-2, filterInvocation.cur); 
 
  678   filterInvocation << 
"}, " << parsedExpr.fUsedCols.size() << 
", \"" << 
name << 
"\", " 
  679                    << 
"reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>(" 
  681                    << 
"reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr << 
")," 
  682                    << 
"reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr << 
")" 
  685   auto lm = jittedFilter->GetLoopManagerUnchecked();
 
  686   lm->ToJitExec(filterInvocation.str());
 
  695                                             std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
 
  700   const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
 
  701   const auto exprVarTypes =
 
  703   const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
 
  704   const auto type = RetTypeOfFunc(funcName);
 
  708   auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name, 
type, lm, colRegister, parsedExpr.fUsedCols);
 
  710   std::stringstream defineInvocation;
 
  711   defineInvocation << 
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>(" << funcName
 
  712                    << 
", new const char*[" << parsedExpr.fUsedCols.size() << 
"]{";
 
  713   for (
const auto &col : parsedExpr.fUsedCols) {
 
  714      defineInvocation << 
"\"" << col << 
"\", ";
 
  716   if (!parsedExpr.fUsedCols.empty())
 
  717      defineInvocation.seekp(-2, defineInvocation.cur); 
 
  722   defineInvocation << 
"}, " << parsedExpr.fUsedCols.size() << 
", \"" << 
name 
  723                    << 
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << 
PrettyPrintAddr(&lm)
 
  724                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>(" 
  726                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
 
  727                    << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
  737                                                      std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
 
  739   const auto funcName = DeclareFunction(std::string(expression), {
"rdfslot_", 
"rdfsampleinfo_"},
 
  740                                         {
"unsigned int", 
"const ROOT::RDF::RSampleInfo"});
 
  741   const auto retType = RetTypeOfFunc(funcName);
 
  745   auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(
name, retType, lm, colRegister, 
ColumnNames_t{});
 
  747   std::stringstream defineInvocation;
 
  748   defineInvocation << 
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>(" 
  749                    << funcName << 
", nullptr, 0, ";
 
  754   defineInvocation << 
"\"" << 
name << 
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << 
PrettyPrintAddr(&lm)
 
  755                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>(" 
  757                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
 
  758                    << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
  766std::shared_ptr<RJittedVariation>
 
  768                 const std::vector<std::string> &variationTags, std::string_view expression, 
RLoopManager &lm,
 
  770                 std::shared_ptr<RNodeBase> *upcastNodeOnHeap, 
bool isSingleColumn)
 
  775   const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
 
  776   const auto exprVarTypes =
 
  778   const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
 
  779   const auto type = RetTypeOfFunc(funcName);
 
  781   if (
type.rfind(
"ROOT::VecOps::RVec", 0) != 0)
 
  782      throw std::runtime_error(
 
  783         "Jitted Vary expressions must return an RVec object. The following expression returns a " + 
type +
 
  784         " instead:\n" + parsedExpr.fExpr);
 
  788   auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, 
type, colRegister,
 
  789                                                             lm, parsedExpr.fUsedCols);
 
  797   std::stringstream varyInvocation;
 
  798   varyInvocation << 
"ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? 
"true" : 
"false") << 
">(" 
  799                  << funcName << 
", new const char*[" << parsedExpr.fUsedCols.size() << 
"]{";
 
  800   for (
const auto &col : parsedExpr.fUsedCols) {
 
  801      varyInvocation << 
"\"" << col << 
"\", ";
 
  803   if (!parsedExpr.fUsedCols.empty())
 
  804      varyInvocation.seekp(-2, varyInvocation.cur); 
 
  805   varyInvocation << 
"}, " << parsedExpr.fUsedCols.size();
 
  806   varyInvocation << 
", new const char*[" << colNames.size() << 
"]{";
 
  807   for (
const auto &col : colNames) {
 
  808      varyInvocation << 
"\"" << col << 
"\", ";
 
  810   varyInvocation.seekp(-2, varyInvocation.cur); 
 
  811   varyInvocation << 
"}, " << colNames.size() << 
", new const char*[" << variationTags.size() << 
"]{";
 
  812   for (
const auto &tag : variationTags) {
 
  813      varyInvocation << 
"\"" << tag << 
"\", ";
 
  815   varyInvocation.seekp(-2, varyInvocation.cur); 
 
  816   varyInvocation << 
"}, " << variationTags.size() << 
", \"" << variationName
 
  817                  << 
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << 
PrettyPrintAddr(&lm)
 
  818                  << 
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>(" 
  820                  << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
 
  821                  << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
  825   return jittedVariation;
 
  831                           const std::type_info &helperArgType, 
const std::type_info &at, 
void *helperArgOnHeap,
 
  833                           std::weak_ptr<RJittedAction> *jittedActionOnHeap)
 
  837   if (!actionTypeClass) {
 
  838      std::string exceptionText = 
"An error occurred while inferring the action type of the operation.";
 
  839      throw std::runtime_error(exceptionText);
 
  841   const std::string actionTypeName = actionTypeClass->GetName();
 
  842   const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(
':') + 1);
 
  846   if (helperArgTypeName.empty()) {
 
  847      ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
 
  855   std::stringstream createAction_str;
 
  856   createAction_str << 
"ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
 
  857   const auto columnTypeNames =
 
  859   for (
auto &colType : columnTypeNames)
 
  860      createAction_str << 
", " << colType;
 
  863   createAction_str << 
">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
  864                    << 
PrettyPrintAddr(prevNode) << 
"), new const char*[" << cols.size() << 
"]{";
 
  865   for (
auto i = 0u; i < cols.size(); ++i) {
 
  867         createAction_str << 
", ";
 
  868      createAction_str << 
'"' << cols[i] << 
'"';
 
  870   createAction_str << 
"}, " << cols.size() << 
", " << nSlots << 
", reinterpret_cast<shared_ptr<" << helperArgTypeName
 
  872                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>(" 
  874                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr << 
"));";
 
  875   return createAction_str.str();
 
  880   for (
const auto &s : strings) {
 
  887std::shared_ptr<RNodeBase> 
UpcastNode(std::shared_ptr<RNodeBase> ptr)
 
  902   for (
auto &col : selectedColumns) {
 
  910   if (!unknownColumns.empty()) {
 
  911      using namespace std::string_literals;
 
  912      std::string errMsg = 
"Unknown column"s + (unknownColumns.size() > 1 ? 
"s: " : 
": ");
 
  913      for (
auto &unknownColumn : unknownColumns)
 
  914         errMsg += 
'"' + unknownColumn + 
"\", ";
 
  915      errMsg.resize(errMsg.size() - 2); 
 
  916      throw std::runtime_error(std::move(errMsg));
 
  919   return selectedColumns;
 
  926   auto toCheckedArgType = [&](
const std::string &
c) {
 
  929      if (colType.rfind(
"CLING_UNKNOWN_TYPE", 0) == 0) { 
 
  931            "The type of custom column \"" + 
c + 
"\" (" + colType.substr(19) +
 
  932            ") is not known to the interpreter, but a just-in-time-compiled " + context +
 
  933            " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
 
  934         throw std::runtime_error(msg);
 
  938   std::vector<std::string> colTypes;
 
  939   colTypes.reserve(colNames.size());
 
  940   std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
 
  949   const auto nColumns = requestedCols.size();
 
  950   std::vector<bool> mustBeDefined(nColumns, 
false);
 
  951   for (
auto i = 0u; i < nColumns; ++i)
 
  952      mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
 
  953   return mustBeDefined;
 
  958   std::unordered_set<std::string> uniqueCols;
 
  959   for (
auto &col : cols) {
 
  960      if (!uniqueCols.insert(col).second) {
 
  961         const auto msg = 
"Error: column \"" + col +
 
  962                          "\" was passed to Snapshot twice. This is not supported: only one of the columns would be " 
  963                          "readable with RDataFrame.";
 
  964         throw std::logic_error(msg);
 
  981std::pair<std::vector<std::string>, std::vector<std::string>>
 
  983                std::vector<std::string> &&colsWithAliases)
 
  986      return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
 
  988   assert(colsWithoutAliases.size() == colsWithAliases.size());
 
  990   auto nCols = colsWithoutAliases.size();
 
  992   for (std::size_t i = 0u; i < nCols; ++i) {
 
  993      const auto &colName = colsWithoutAliases[i];
 
  997      auto *
b = 
tree->GetBranch(colName.c_str());
 
  999         b = 
tree->FindBranch(colName.c_str());
 
 1000      assert(
b != 
nullptr);
 
 1001      auto *leaves = 
b->GetListOfLeaves();
 
 1005      TLeaf *countLeaf = 
static_cast<TLeaf *
>(leaves->At(0))->GetLeafCount();
 
 1010      colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->
GetName());
 
 1011      colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->
GetName());
 
 1016   return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
 
 1021   std::set<std::string> uniqueCols;
 
 1023      std::remove_if(columnNames.begin(), columnNames.end(),
 
 1024                     [&uniqueCols](
const std::string &colName) { return !uniqueCols.insert(colName).second; }),
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
Option_t Option_t TPoint TPoint const char text
 
R__EXTERN TVirtualMutex * gROOTMutex
 
#define R__LOCKGUARD(mutex)
 
The head node of a RDF computation graph.
 
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
 
void ToJitExec(const std::string &) const
 
void Run(bool jit=true)
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
 
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
 
A binder for user-defined columns, variations and aliases.
 
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
 
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
 
bool IsAlias(const std::string &name) const
Return true if the given column name is an existing alias.
 
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
 
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
 
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
 
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
 
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
 
The public interface to the RDataFrame federation of classes.
 
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
 
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
 
const char * GetName() const override
Returns name of object.
 
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
 
A TTree represents a columnar dataset.
 
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
 
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
 
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
 
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
 
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
 
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
 
void CheckValidCppVarName(std::string_view var, const std::string &where)
 
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
 
void RemoveDuplicates(ColumnNames_t &columnNames)
 
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
 
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
 
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
 
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
 
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
 
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
 
std::string PrettyPrintAddr(const void *const addr)
 
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
 
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
 
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
 
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
 
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
 
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
 
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
 
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
 
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
 
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
 
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
 
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
 
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
 
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
 
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
 
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
 
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
 
std::vector< std::string > ColumnNames_t
 
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
 
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.