40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Woverloaded-virtual"
42#pragma GCC diagnostic ignored "-Wshadow"
46#pragma GCC diagnostic pop
60#include <unordered_map>
61#include <unordered_set>
74struct ParsedExpression {
79 ColumnNames_t fUsedCols;
81 ColumnNames_t fVarNames;
92 const auto msg =
"Failed to tokenize expression:\n" +
expr +
"\n\nMake sure it is valid C++.";
93 throw std::runtime_error(
msg);
96 std::unordered_set<std::string>
usedCols;
101 const auto kSymbol = lexertk::token::e_symbol;
102 for (
auto i = 0
u; i <
nTokens; ++i) {
157 dot.Substitute(out,
"\\.",
"g");
158 return std::string(std::move(out));
169 replacer.Substitute(out, col.data(),
"g");
182 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
197 for (
auto i = 0
u; i <
varNames.size(); ++i)
198 varNames[i] =
"var" + std::to_string(i);
206 [](
const std::string &
a,
const std::string &
b) { return a.size() > b.size(); });
269 std::stringstream
ss;
271 for (
auto i = 0
u; i < vars.size(); ++i) {
285 ss.seekp(-2,
ss.cur);
338 const auto funcName =
exprIt->second;
347 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" +
funcBaseName +
361 const auto dt =
gROOT->GetType((funcName +
"_ret_t").c_str());
363 const auto type =
dt->GetFullTypeName();
378 "RDataFrame::Jit: cannot just-in-time compile a \"" +
actionTypeNameBase +
"\" action using helper type \"" +
380 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
381 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
382 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
383 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
384 "interpreter, e.g. via gInterpreter->Declare.";
402 [&](
const std::string &
name) {
403 if (name[0] ==
'#') {
404 filteredColumns.emplace_back(name);
412 std::string
msg =
"Column name(s) {";
415 msg[
msg.size() - 2] =
'}';
416 msg +=
"will be ignored. Please go through a valid Alias to " +
action +
" an array size column";
417 throw std::runtime_error(
msg);
432 auto isALetter = [](
char c) {
return (
c >=
'A' &&
c <=
'Z') || (
c >=
'a' &&
c <=
'z'); };
438 auto isANumber = [](
char c) {
return c >=
'0' &&
c <=
'9'; };
440 for (
const char c : var)
445 const auto objName =
where ==
"Define" ?
"column" :
"variation";
446 const auto error =
"RDataFrame::" +
where +
": cannot define " +
objName +
" \"" + std::string(var) +
447 "\". Not a valid C++ variable name.";
448 throw std::runtime_error(error);
452std::string DemangleTypeIdName(
const std::type_info &
typeInfo)
488 text =
": there is no column available to match.";
492 throw std::runtime_error(
text);
505 "\", already exists in this branch of the computation graph.";
507 error =
"A column with that name has already been Define'd. Use Redefine to force redefinition.";
510 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
512 if (!error.empty()) {
513 error =
"RDataFrame::" +
where +
": cannot define column \"" + std::string(
definedColView) +
"\". " + error;
514 throw std::runtime_error(error);
526 "\", already exists. Aliases cannot be Redefined or Varied.";
535 error =
"No column with that name was found in the dataset. Use Define to create a new column.";
538 if (!error.empty()) {
539 if (
where ==
"DefaultValueFor")
540 error =
"RDataFrame::" +
where +
": cannot provide default values for column \"" +
543 error =
"RDataFrame::" +
where +
": cannot redefine or vary column \"" + std::string(
definedColView) +
"\". " +
545 throw std::runtime_error(error);
555 if (
where ==
"Redefine") {
556 const std::string error =
"RDataFrame::" +
where +
": cannot redefine column \"" +
definedCol +
557 "\". The column depends on one or more systematic variations and re-defining varied "
558 "columns is not supported.";
559 throw std::runtime_error(error);
560 }
else if (
where ==
"DefaultValueFor") {
561 const std::string error =
"RDataFrame::" +
where +
": cannot provide a default value for column \"" +
563 "\". The column depends on one or more systematic variations and it should not be "
564 "possible to have missing values in varied columns.";
565 throw std::runtime_error(error);
567 const std::string error =
568 "RDataFrame::" +
where +
": this operation cannot work with columns that depend on systematic variations.";
569 throw std::runtime_error(error);
577 std::string
err_msg =
"The number of template parameters specified is ";
581 err_msg +=
" columns have been specified.";
582 throw std::runtime_error(
err_msg);
593 throw std::runtime_error(
595 " required but none were provided and the default list has size " + std::to_string(
defaultNames.size()));
602 " required but " + std::to_string(names.size()) + (names.size() == 1 ?
" was" :
" were") +
604 for (
const auto &
name : names)
607 throw std::runtime_error(
msg);
640 if (std::string_view::npos !=
lastSlash) {
656std::shared_ptr<RDFDetail::RJittedFilter>
657BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> prevNode, std::string_view
name, std::string_view expression,
668 throw std::runtime_error(
"Filter: the following expression does not evaluate to bool:\n" +
669 std::string(expression));
671 auto *
lm = prevNode->GetLoopManagerUnchecked();
672 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
678 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
679 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
680 <<
"void *jittedFilter, "
681 <<
"std::shared_ptr<void> *) {\n";
682 filterInvocation <<
" ROOT::Internal::RDF::JitFilterHelper(" << funcName <<
", "
686 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedFilter*>(jittedFilter)"
715 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
716 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
717 <<
"void *jittedDefine, "
718 <<
"std::shared_ptr<void> *) {\n";
719 defineInvocation <<
" ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
724 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
736 const auto funcName =
DeclareFunction(std::string(expression), {
"rdfslot_",
"rdfsampleinfo_"},
737 {
"unsigned int",
"const ROOT::RDF::RSampleInfo"});
748 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
749 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
750 <<
"void *jittedDefine, "
751 <<
"std::shared_ptr<void> *) {\n";
752 defineInvocation <<
" ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
757 <<
"reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
765std::shared_ptr<RJittedVariation>
776 const auto funcName =
780 if (
type.rfind(
"ROOT::VecOps::RVec", 0) != 0) {
781 throw std::runtime_error(
782 "Jitted Vary expressions must return an RVec object. The following expression return type is '" +
type +
797 varyInvocation <<
"(const std::vector<std::string> &inputColNames, "
798 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
799 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
800 <<
"void *jittedVariation, "
801 <<
"std::shared_ptr<void> *helperArg) {\n";
803 <<
" auto *variedColNamesAndTags = reinterpret_cast<std::shared_ptr<std::pair<std::vector<std::string>, "
804 "std::vector<std::string>>> *>(helperArg);"
805 <<
" ROOT::Internal::RDF::JitVariationHelper<" << (
isSingleColumn ?
"true" :
"false") <<
">(" << funcName
810 <<
"reinterpret_cast<ROOT::Internal::RDF::RJittedVariation *>(jittedVariation), "
811 <<
"(*variedColNamesAndTags)->first, "
812 <<
"(*variedColNamesAndTags)->second"
814 lm.RegisterJitHelperCall(
817 std::make_shared<std::pair<std::vector<std::string>, std::vector<std::string>>>(
colNames,
variationTags));
830 std::string
exceptionText =
"An error occurred while inferring the action type of the operation.";
846 <<
"ROOT::Internal::RDF::RColumnRegister &colRegister, "
847 <<
"ROOT::Detail::RDF::RLoopManager &lm, "
848 <<
"void *jittedAction, "
849 <<
"std::shared_ptr<void> *helperArg) {\n";
858 <<
"reinterpret_cast<ROOT::Internal::RDF::RJittedAction *>(jittedAction), " <<
nSlots <<
", "
866 for (
const auto &s :
strings) {
873std::shared_ptr<RNodeBase>
UpcastNode(std::shared_ptr<RNodeBase> ptr)
901 const auto &
colsToIgnore =
lm.GetSuppressErrorsForMissingBranches();
906 std::string
errMsg = std::string(
"Unknown column") + (
unknownColumns.size() > 1 ?
"s: " :
": ");
910 throw std::runtime_error(
errMsg);
924 if (
colType.rfind(
"CLING_UNKNOWN_TYPE", 0) == 0) {
926 "The type of custom column \"" +
c +
"\" (" +
colType.substr(19) +
927 ") is not known to the interpreter, but a just-in-time-compiled " + context +
928 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
929 throw std::runtime_error(
msg);
942 for (
auto &col :
cols) {
944 const auto msg =
"Error: column \"" + col +
945 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
946 "readable with RDataFrame.";
947 throw std::logic_error(
msg);
975 "The RNTuple-specific %s option in RSnapshotOptions has been set, but the output format is "
976 "set to TTree, so this option won't have any effect. Use the other options available in "
977 "RSnapshotOptions to "
978 "configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.",
993 "The TTree-specific %s option in RSnapshotOptions has been set, but the output format is set to RNTuple, "
994 "so this option won't have any effect. Use the fNTupleWriteOptions option available in RSnapshotOptions to "
995 "configure the output RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.",
1003std::pair<std::vector<std::string>, std::vector<std::string>>
1009 tree =
treeDS->GetTree();
1017 for (std::size_t i = 0
u; i <
nCols; ++i) {
1020 auto *
b = tree->GetBranch(
colName.c_str());
1022 b = tree->FindBranch(
colName.c_str());
1027 auto *
leaves =
b->GetListOfLeaves();
1050 [&
uniqueCols](
const std::string &
colName) { return !uniqueCols.insert(colName).second; }),
1059 [](
const std::string &
colName) { return colName.find(
'.') == std::string::npos; });
1063 if (colName.find(
'.') == std::string::npos)
1065 const auto parentFieldName = colName.substr(0, colName.find_first_of(
'.'));
1066 return std::find(parentFields.cbegin(), parentFields.cend(), parentFieldName) !=
1087 lm.GetSuppressErrorsForMissingBranches().find(
colName) ==
lm.GetSuppressErrorsForMissingBranches().
end())
1090 const auto nSlots =
lm.GetNSlots();
1091 std::vector<std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>>
colReaders;
1104 const std::vector<const std::type_info *> &
colTypeIDs,
1108 assert(
nCols ==
colTypeIDs.size() &&
"Must provide exactly one column type for each column to create");
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
R__EXTERN TVirtualMutex * gROOTMutex
#define R__LOCKGUARD(mutex)
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
const_iterator begin() const
const_iterator end() const
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
A TTree represents a columnar dataset.
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister)
Book the jitting of a DefinePerSample call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > prevNode, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::string JitBuildAction(const ColumnNames_t &cols, const std::type_info &helperArgType, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, const bool vector2RVec)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
void RemoveRNTupleSubfields(ColumnNames_t &columnNames)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2RVec)
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister)
Book the jitting of a Define call.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn, const std::string &varyColType)
Book the jitting of a Vary call.
std::vector< std::string > ColumnNames_t
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
BVH_ALWAYS_INLINE T dot(const Vec< T, N > &a, const Vec< T, N > &b)
A collection of options to steer the creation of the dataset on disk through Snapshot().