24 namespace Experimental {
37 const ColumnNames_t &customColumns,
const ColumnNames_t &dsColumns,
38 const std::map<std::string, std::string> &aliasMap)
41 const std::string paddedExpr =
" " + std::string(expression) +
" ";
42 int paddedExprLen = paddedExpr.size();
43 static const std::string regexBit(
"[^a-zA-Z0-9_]");
45 std::vector<std::string> usedBranches;
48 for (
auto &brName : customColumns) {
49 std::string bNameRegexContent = regexBit + brName + regexBit;
50 TRegexp bNameRegex(bNameRegexContent.c_str());
51 if (-1 != bNameRegex.Index(paddedExpr.c_str(), &paddedExprLen)) {
52 usedBranches.emplace_back(brName);
57 for (
auto &brName : branches) {
58 std::string bNameRegexContent = regexBit + brName + regexBit;
59 TRegexp bNameRegex(bNameRegexContent.c_str());
60 if (-1 != bNameRegex.Index(paddedExpr.c_str(), &paddedExprLen)) {
61 usedBranches.emplace_back(brName);
66 for (
auto &col : dsColumns) {
67 std::string bNameRegexContent = regexBit + col + regexBit;
68 TRegexp bNameRegex(bNameRegexContent.c_str());
69 if (-1 != bNameRegex.Index(paddedExpr.c_str(), &paddedExprLen)) {
71 if (std::find(usedBranches.begin(), usedBranches.end(), col) == usedBranches.end())
72 usedBranches.emplace_back(col);
77 for (
auto &alias_colName : aliasMap) {
78 auto &alias = alias_colName.first;
79 std::string bNameRegexContent = regexBit + alias + regexBit;
80 TRegexp bNameRegex(bNameRegexContent.c_str());
81 if (-1 != bNameRegex.Index(paddedExpr.c_str(), &paddedExprLen)) {
83 if (std::find(usedBranches.begin(), usedBranches.end(), alias) == usedBranches.end())
84 usedBranches.emplace_back(alias);
95 const std::map<std::string, std::string> &aliasMap,
const ColumnNames_t &
branches,
96 const std::vector<std::string> &customColumns,
97 const std::map<std::string, TmpBranchBasePtr_t> &tmpBookedBranches, TTree *
tree,
100 const auto &dsColumns = ds ? ds->
GetColumnNames() : ColumnNames_t{};
101 auto usedBranches =
FindUsedColumnNames(expression, branches, customColumns, dsColumns, aliasMap);
102 auto exprNeedsVariables = !usedBranches.empty();
107 std::vector<std::string> usedBranchesTypes;
108 static unsigned int iNs = 0U;
109 std::stringstream dummyDecl;
110 dummyDecl <<
"namespace __tdf_" << std::to_string(iNs++) <<
"{ auto __tdf_lambda = []() {";
113 auto aliasMapEnd = aliasMap.end();
114 if (exprNeedsVariables) {
115 for (
auto &brName : usedBranches) {
118 auto aliasMapIt = aliasMap.find(brName);
119 auto &realBrName = aliasMapEnd == aliasMapIt ? brName : aliasMapIt->second;
121 auto tmpBrIt = tmpBookedBranches.find(realBrName);
122 auto tmpBr = tmpBrIt == tmpBookedBranches.end() ? nullptr : tmpBrIt->second.get();
124 dummyDecl << brTypeName <<
" " << brName <<
";\n";
125 usedBranchesTypes.emplace_back(brTypeName);
129 TRegexp re(
"[^a-zA-Z0-9_]return[^a-zA-Z0-9_]");
130 int exprSize = expression.size();
131 bool hasReturnStmt = re.
Index(std::string(expression), &exprSize) != -1;
136 dummyDecl << expression <<
"\n;};}";
138 dummyDecl <<
"return " << expression <<
"\n;};}";
143 "Cannot interpret the following expression:\n" + std::string(expression) +
"\n\nMake sure it is valid C++.";
144 throw std::runtime_error(msg);
148 std::stringstream ss;
150 for (
unsigned int i = 0; i < usedBranchesTypes.size(); ++i) {
155 if (usedBranchesTypes[i].find(
"ROOT::Experimental::TDF::TArrayBranch<") == 0u)
159 ss << usedBranchesTypes[i] <<
"& " << usedBranches[i] <<
", ";
161 if (!usedBranchesTypes.empty())
162 ss.seekp(-2, ss.cur);
165 ss <<
"){\n" << expression <<
"\n}";
167 ss <<
"){return " << expression <<
"\n;}";
169 auto filterLambda = ss.str();
174 const auto targetTypeName =
"ROOT::Experimental::TDF::TInterface<" + std::string(returnTypeName) +
">";
178 ss << targetTypeName <<
"(((" << interfaceTypeName <<
"*)" << thisPtr <<
")->" << methodName <<
"(";
179 if (methodName ==
"Define") {
180 ss <<
"\"" << name <<
"\", ";
182 ss << filterLambda <<
", {";
183 for (
auto brName : usedBranches) {
185 auto aliasMapIt = aliasMap.find(brName);
186 auto &realBrName = aliasMapEnd == aliasMapIt ? brName : aliasMapIt->second;
187 ss <<
"\"" << realBrName <<
"\", ";
189 if (exprNeedsVariables)
190 ss.seekp(-2, ss.cur);
193 if (methodName ==
"Filter") {
194 ss <<
", \"" << name <<
"\"";
200 auto retVal =
gInterpreter->Calc(ss.str().c_str(), &interpErrCode);
201 if (TInterpreter::EErrorCode::kNoError != interpErrCode || !retVal) {
202 std::string msg =
"Cannot interpret the invocation to " + std::string(methodName) +
": ";
204 if (TInterpreter::EErrorCode::kNoError != interpErrCode) {
205 msg +=
"\nInterpreter error code is " + std::to_string(interpErrCode) +
".";
207 throw std::runtime_error(msg);
214 std::string
JitBuildAndBook(
const ColumnNames_t &bl,
const std::string &prevNodeTypename,
void *prevNode,
215 const std::type_info &art,
const std::type_info &at,
const void *rOnHeap, TTree *
tree,
216 const unsigned int nSlots,
const std::map<std::string, TmpBranchBasePtr_t> &customColumns,
217 TDataSource *ds,
const std::shared_ptr<TActionBase *> *
const actionPtrPtr)
219 auto nBranches = bl.size();
222 std::vector<TCustomColumnBase *> tmpBranchPtrs(nBranches,
nullptr);
223 for (
auto i = 0u; i < nBranches; ++i) {
224 auto tmpBranchIt = customColumns.find(bl[i]);
225 if (tmpBranchIt != customColumns.end())
226 tmpBranchPtrs[i] = tmpBranchIt->second.get();
230 std::vector<std::string> columnTypeNames(nBranches);
231 for (
auto i = 0u; i < nBranches; ++i) {
233 if (columnTypeName.empty()) {
234 std::string exceptionText =
"The type of column ";
235 exceptionText += bl[i];
236 exceptionText +=
" could not be guessed. Please specify one.";
237 throw std::runtime_error(exceptionText.c_str());
239 columnTypeNames[i] = columnTypeName;
244 if (!actionResultTypeClass) {
245 std::string exceptionText =
"An error occurred while inferring the result type of an operation.";
246 throw std::runtime_error(exceptionText.c_str());
248 const auto actionResultTypeName = actionResultTypeClass->GetName();
252 if (!actionTypeClass) {
253 std::string exceptionText =
"An error occurred while inferring the action type of the operation.";
254 throw std::runtime_error(exceptionText.c_str());
256 const auto actionTypeName = actionTypeClass->GetName();
262 std::stringstream createAction_str;
263 createAction_str <<
"ROOT::Internal::TDF::CallBuildAndBook" 264 <<
"<" << actionTypeName;
265 for (
auto &colType : columnTypeNames)
266 createAction_str <<
", " << colType;
267 createAction_str <<
">(*reinterpret_cast<" << prevNodeTypename <<
"*>(" << prevNode <<
"), {";
268 for (
auto i = 0u; i < bl.size(); ++i) {
270 createAction_str <<
", ";
271 createAction_str <<
'"' << bl[i] <<
'"';
273 createAction_str <<
"}, " << nSlots <<
", reinterpret_cast<" << actionResultTypeName <<
"*>(" << rOnHeap <<
")" 274 <<
", reinterpret_cast<const std::shared_ptr<ROOT::Internal::TDF::TActionBase*>*>(" << actionPtrPtr
276 return createAction_str.str();
281 for (
const auto &
s : strings) {
288 std::shared_ptr<TFilterBase>
UpcastNode(
const std::shared_ptr<TFilterBase> ptr)
293 std::shared_ptr<TCustomColumnBase>
UpcastNode(
const std::shared_ptr<TCustomColumnBase> ptr)
298 std::shared_ptr<TRangeBase>
UpcastNode(
const std::shared_ptr<TRangeBase> ptr)
303 std::shared_ptr<TLoopManager>
UpcastNode(
const std::shared_ptr<TLoopManager> ptr)
313 const ColumnNames_t &validCustomColumns,
TDataSource *ds)
316 auto selectedColumns =
SelectColumns(nColumns, columns, defaultColumns);
320 if (!unknownColumns.empty()) {
322 std::stringstream unknowns;
323 std::string delim = unknownColumns.size() > 1 ?
"s: " :
": ";
324 for (
auto &unknownColumn : unknownColumns) {
325 unknowns << delim << unknownColumn;
328 throw std::runtime_error(
"Unknown column" + unknowns.str());
333 auto aliasMapEnd = aliasMap.end();
335 for (
auto idx :
ROOT::TSeqU(selectedColumns.size())) {
336 const auto &colName = selectedColumns[idx];
337 const auto aliasColumnNameIt = aliasMap.find(colName);
338 if (aliasMapEnd != aliasColumnNameIt) {
339 selectedColumns[idx] = aliasColumnNameIt->second;
343 return selectedColumns;
351 const auto nColumns = requestedCols.size();
352 std::vector<bool> mustBeDefined(nColumns,
false);
353 for (
auto i = 0u; i < nColumns; ++i)
354 mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
355 return mustBeDefined;
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< TFilterBase > UpcastNode(const std::shared_ptr< TFilterBase > ptr)
basic_string_view< char > string_view
Namespace for new ROOT classes and functions.
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols, const ColumnNames_t &dataSourceColumns)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
Regular expression class.
const std::map< std::string, std::string > & GetAliasMap() const
std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode, const std::type_info &art, const std::type_info &at, const void *r, TTree *tree, const unsigned int nSlots, const std::map< std::string, TmpBranchBasePtr_t > &customColumns, TDataSource *ds, const std::shared_ptr< TActionBase *> *const actionPtrPtr)
ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, TDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
TDataSource defines an API that TDataFrame can use to read arbitrary data formats.
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the TDataFrame...
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
A pseudo container class which is a generator of indices.
static constexpr double s
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch, TDataSource *ds)
Return a string containing the type of the given branch.
std::vector< std::string > FindUsedColumnNames(std::string_view expression, const ColumnNames_t &branches, const ColumnNames_t &customColumns, const ColumnNames_t &dsColumns, const std::map< std::string, std::string > &aliasMap)
Long_t JitTransformation(void *thisPtr, std::string_view methodName, std::string_view interfaceTypeName, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const std::vector< std::string > &customColumns, const std::map< std::string, TmpBranchBasePtr_t > &tmpBookedBranches, TTree *tree, std::string_view returnTypeName, TDataSource *ds)
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the TDataFrame federation of classes.