39#pragma GCC diagnostic push 
   40#pragma GCC diagnostic ignored "-Woverloaded-virtual" 
   41#pragma GCC diagnostic ignored "-Wshadow" 
   45#pragma GCC diagnostic pop 
   60#include <unordered_map> 
   61#include <unordered_set> 
   84struct ParsedExpression {
 
   89   ColumnNames_t fUsedCols;
 
   91   ColumnNames_t fVarNames;
 
   95std::pair<ColumnNames_t, ColumnNames_t>
 
  102      const auto msg = 
"Failed to tokenize expression:\n" + 
expr + 
"\n\nMake sure it is valid C++.";
 
  103      throw std::runtime_error(
msg);
 
  106   std::unordered_set<std::string> 
usedCols;
 
  111   const auto kSymbol = lexertk::token::e_symbol;
 
  112   for (
auto i = 0
u; i < 
nTokens; ++i) {
 
  162   dot.Substitute(out, 
"\\.", 
"g");
 
  163   return std::string(std::move(out));
 
  174      replacer.Substitute(out, col.data(), 
"g");
 
  188      "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
 
  203   for (
auto i = 0
u; i < 
varNames.size(); ++i)
 
  204      varNames[i] = 
"var" + std::to_string(i);
 
  212             [](
const std::string &
a, 
const std::string &
b) { return a.size() > b.size(); });
 
  275   std::stringstream 
ss;
 
  277   for (
auto i = 0
u; i < vars.size(); ++i) {
 
  291      ss.seekp(-2, 
ss.cur);
 
  313      const auto funcName = 
exprIt->second;
 
  322                          "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + 
funcBaseName +
 
  336   const auto dt = 
gROOT->GetType((funcName + 
"_ret_t").c_str());
 
  338   const auto type = 
dt->GetFullTypeName();
 
  353      "RDataFrame::Jit: cannot just-in-time compile a \"" + 
actionTypeNameBase + 
"\" action using helper type \"" +
 
  355      "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have " 
  356      "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action " 
  357      "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. " 
  358      "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the " 
  359      "interpreter, e.g. via gInterpreter->Declare.";
 
  377                [&](
const std::string &
name) {
 
  378                   if (name[0] == 
'#') {
 
  379                     filteredColumns.emplace_back(name);
 
  387      std::string 
msg = 
"Column name(s) {";
 
  390      msg[
msg.size() - 2] = 
'}';
 
  391      msg += 
"will be ignored. Please go through a valid Alias to " + 
action + 
" an array size column";
 
  392      throw std::runtime_error(
msg);
 
 
  405   if (col.size() > 1 && col[0] == 
'#')
 
  406      return "R_rdf_sizeof_" + col.substr(1);
 
 
  420   auto isALetter = [](
char c) { 
return (
c >= 
'A' && 
c <= 
'Z') || (
c >= 
'a' && 
c <= 
'z'); };
 
  426   auto isANumber = [](
char c) { 
return c >= 
'0' && 
c <= 
'9'; };
 
  428   for (
const char c : var)
 
  433      const auto objName = 
where == 
"Define" ? 
"column" : 
"variation";
 
  434      const auto error = 
"RDataFrame::" + 
where + 
": cannot define " + 
objName + 
" \"" + std::string(var) +
 
  435                         "\". Not a valid C++ variable name.";
 
  436      throw std::runtime_error(error);
 
 
  440std::string DemangleTypeIdName(
const std::type_info &
typeInfo)
 
 
  476         text = 
": there is no column available to match.";
 
  480      throw std::runtime_error(
text);
 
 
  493              "\", already exists in this branch of the computation graph.";
 
  495      error = 
"A column with that name has already been Define'd. Use Redefine to force redefinition.";
 
  501         "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
 
  504         "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
 
  506   if (!error.empty()) {
 
  507      error = 
"RDataFrame::" + 
where + 
": cannot define column \"" + std::string(
definedColView) + 
"\". " + error;
 
  508      throw std::runtime_error(error);
 
 
  520              "\", already exists. Aliases cannot be Redefined or Varied.";
 
  533         error = 
"No column with that name was found in the dataset. Use Define to create a new column.";
 
  536   if (!error.empty()) {
 
  538         "RDataFrame::" + 
where + 
": cannot redefine or vary column \"" + std::string(
definedColView) + 
"\". " + error;
 
  539      throw std::runtime_error(error);
 
 
  549      const std::string error =
 
  551         "\". The column depends on one or more systematic variations and re-defining varied columns is not supported.";
 
  552      throw std::runtime_error(error);
 
 
  559      std::string 
err_msg = 
"The number of template parameters specified is ";
 
  563      err_msg += 
" columns have been specified.";
 
  564      throw std::runtime_error(
err_msg);
 
 
  575         throw std::runtime_error(
 
  577            " required but none were provided and the default list has size " + std::to_string(
defaultNames.size()));
 
  584                    " required but " + std::to_string(names.size()) + (names.size() == 1 ? 
" was" : 
" were") +
 
  586         for (
const auto &
name : names)
 
  589         throw std::runtime_error(
msg);
 
 
  625   if (std::string_view::npos != 
lastSlash) {
 
 
  641std::shared_ptr<RDFDetail::RJittedFilter>
 
  653      std::runtime_error(
"Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
 
  660   const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
 
  661      (*prevNodeOnHeap)->GetLoopManagerUnchecked(), 
name,
 
  667   filterInvocation << 
"ROOT::Internal::RDF::JitFilterHelper(" << funcName << 
", new const char*[" 
  678                    << 
"reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>(" 
  680                    << 
"reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << 
prevNodeAddr << 
")," 
  681                    << 
"reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << 
definesOnHeapAddr << 
")" 
 
  696   auto *
const tree = 
lm.GetTree();
 
  710   defineInvocation << 
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>(" << funcName
 
  711                    << 
", new const char*[" << 
parsedExpr.fUsedCols.size() << 
"]{";
 
  712   for (
const auto &col : 
parsedExpr.fUsedCols) {
 
  722                    << 
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << 
PrettyPrintAddr(&
lm)
 
  723                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>(" 
  725                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << 
definesAddr 
  726                    << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
 
  738   const auto funcName = 
DeclareFunction(std::string(expression), {
"rdfslot_", 
"rdfsampleinfo_"},
 
  739                                         {
"unsigned int", 
"const ROOT::RDF::RSampleInfo"});
 
  747   defineInvocation << 
"ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>(" 
  748                    << funcName << 
", nullptr, 0, ";
 
  754                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>(" 
  756                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << 
definesAddr 
  757                    << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
 
  765std::shared_ptr<RJittedVariation>
 
  771   auto *
const tree = 
lm.GetTree();
 
  780   if (
type.rfind(
"ROOT::VecOps::RVec", 0) != 0) {
 
  784      throw std::runtime_error(
 
  785         "Jitted Vary expressions must return an RVec object. The following expression returns a " + 
type +
 
  802                  << funcName << 
", new const char*[" << 
parsedExpr.fUsedCols.size() << 
"]{";
 
  803   for (
const auto &col : 
parsedExpr.fUsedCols) {
 
  820                  << 
"\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << 
PrettyPrintAddr(&
lm)
 
  821                  << 
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>(" 
  823                  << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << 
colRegisterAddr 
  824                  << 
"), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
 
  841      std::string 
exceptionText = 
"An error occurred while inferring the action type of the operation.";
 
  866   createAction_str << 
">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" 
  868   for (
auto i = 0
u; i < 
cols.size(); ++i) {
 
  875                    << 
"), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>(" 
  877                    << 
"), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << 
definesAddr << 
"));";
 
 
  883   for (
const auto &s : 
strings) {
 
 
  890std::shared_ptr<RNodeBase> 
UpcastNode(std::shared_ptr<RNodeBase> ptr)
 
 
  914      using namespace std::string_literals;
 
  919      throw std::runtime_error(
errMsg);
 
 
  932      if (
colType.rfind(
"CLING_UNKNOWN_TYPE", 0) == 0) { 
 
  934            "The type of custom column \"" + 
c + 
"\" (" + 
colType.substr(19) +
 
  935            ") is not known to the interpreter, but a just-in-time-compiled " + context +
 
  936            " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
 
  937         throw std::runtime_error(
msg);
 
 
  962   for (
auto &col : 
cols) {
 
  964         const auto msg = 
"Error: column \"" + col +
 
  965                          "\" was passed to Snapshot twice. This is not supported: only one of the columns would be " 
  966                          "readable with RDataFrame.";
 
  967         throw std::logic_error(
msg);
 
 
  974std::pair<std::vector<std::string>, std::vector<std::string>>
 
  985   for (std::size_t i = 0
u; i < 
nCols; ++i) {
 
  990      auto *
b = tree->GetBranch(
colName.c_str());
 
  992         b = tree->FindBranch(
colName.c_str());
 
  994      auto *
leaves = 
b->GetListOfLeaves();
 
 
 1017                     [&
uniqueCols](
const std::string &
colName) { return !uniqueCols.insert(colName).second; }),
 
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
Option_t Option_t TPoint TPoint const char text
 
R__EXTERN TVirtualMutex * gROOTMutex
 
#define R__LOCKGUARD(mutex)
 
The head node of a RDF computation graph.
 
A binder for user-defined columns, variations and aliases.
 
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
 
const_iterator begin() const
 
const_iterator end() const
 
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
 
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
 
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
 
A TTree represents a columnar dataset.
 
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
 
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
 
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
 
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
 
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
 
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
 
void CheckValidCppVarName(std::string_view var, const std::string &where)
 
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
 
void RemoveDuplicates(ColumnNames_t &columnNames)
 
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
 
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
 
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
 
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
 
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
 
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
 
std::string PrettyPrintAddr(const void *const addr)
 
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
 
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
 
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
 
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
 
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
 
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
 
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
 
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
 
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
 
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
 
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
 
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
 
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
 
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
 
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
 
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
 
std::vector< std::string > ColumnNames_t
 
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
 
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.