11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
48#include <initializer_list>
58#include <unordered_set>
86template <
typename Proxied,
typename DataSource>
89using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
103template <
typename Proxied,
typename DataSource =
void>
112 template <
typename T,
typename W>
131 const auto treeName =
tree->GetName();
132 const auto isTChain =
dynamic_cast<TChain *
>(
tree) ?
true :
false;
133 const auto treeType = isTChain ?
"TChain" :
"TTree";
134 const auto isInMemory = !isTChain && !
tree->GetCurrentFile() ? true :
false;
136 const auto hasFriends = friendInfo.fFriendNames.empty() ? false :
true;
137 std::stringstream ss;
138 ss <<
"Dataframe from " << treeType <<
" " << treeName;
140 ss <<
" (in-memory)";
143 const auto numFiles = files.size();
145 ss <<
" in file " << files[0];
148 for (
auto i = 0u; i < numFiles; i++) {
149 ss <<
" " << files[i];
150 if (i < numFiles - 1)
156 const auto numFriends = friendInfo.fFriendNames.size();
157 if (numFriends == 1) {
158 ss <<
"\nwith friend\n";
160 ss <<
"\nwith friends\n";
162 for (
auto i = 0u; i < numFriends; i++) {
163 const auto nameAlias = friendInfo.fFriendNames[i];
164 const auto files = friendInfo.fFriendFileNames[i];
165 const auto numFiles = files.size();
166 const auto subnames = friendInfo.fFriendChainSubNames[i];
167 ss <<
" " << nameAlias.first;
168 if (nameAlias.first != nameAlias.second)
169 ss <<
" (" << nameAlias.second <<
")";
172 ss <<
" " << files[0];
177 for (
auto j = 0u; j < numFiles; j++) {
178 ss <<
" " << subnames[j] <<
" " << files[j];
179 if (j < numFiles - 1)
183 if (i < numFriends - 1)
192 return "Dataframe from datasource " + datasourceLabel;
198 return "Empty dataframe filling 1 row";
200 return "Empty dataframe filling " + std::to_string(
n) +
" rows";
225 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value,
int>>
285 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value,
int> = 0>
289 RDFInternal::CheckFilter(
f);
290 using ColTypes_t =
typename TTraits::CallableTraits<F>::arg_types;
291 constexpr auto nColumns = ColTypes_t::list_size;
309 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value,
int> = 0>
325 template <
typename F>
350 using BaseNodeType_t =
typename std::remove_pointer_t<
decltype(upcastNodeOnHeap)>::element_type;
352 const auto jittedFilter =
389 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value,
int> = 0>
392 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(
name, std::move(expression), columns,
"Define");
418 template <
typename F>
421 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(
name, std::move(expression), columns,
"DefineSlot");
448 template <
typename F>
451 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(
name, std::move(expression), columns,
469 constexpr auto where =
"Define";
498 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value,
int> = 0>
501 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(
name, std::move(expression), columns,
"Redefine");
517 template <
typename F>
520 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(
name, std::move(expression), columns,
"RedefineSlot");
536 template <
typename F>
539 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(
name, std::move(expression), columns,
540 "RedefineSlotEntry");
559 constexpr auto where =
"Redefine";
603 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
611 if (retTypeName.empty()) {
615 retTypeName =
"CLING_UNKNOWN_TYPE_" + demangledType;
619 std::make_shared<RDFDetail::RDefinePerSample<F>>(
name, retTypeName, std::move(expression), *
fLoopManager);
622 newCols.AddColumn(std::move(newColumn));
709 template <
typename F>
711 std::size_t nVariations, std::string_view variationName =
"")
713 R__ASSERT(nVariations > 0 &&
"Must have at least one variation.");
715 std::vector<std::string> variationTags;
716 variationTags.reserve(nVariations);
717 for (std::size_t i = 0u; i < nVariations; ++i)
718 variationTags.emplace_back(std::to_string(i));
720 const std::string theVariationName{variationName.empty() ? colName : variationName};
722 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
729 template <
typename F>
731 const std::vector<std::string> &variationTags, std::string_view variationName =
"")
733 std::vector<std::string> colNames{{std::string(colName)}};
734 const std::string theVariationName{variationName.empty() ? colName : variationName};
736 return Vary(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, theVariationName);
753 template <
typename F>
756 const std::vector<std::string> &variationTags, std::string_view variationName)
759 R__ASSERT(variationTags.size() > 0 &&
"Must have at least one variation.");
760 R__ASSERT(colNames.size() > 0 &&
"Must have at least one varied column.");
761 R__ASSERT(!variationName.empty() &&
"Must provide a variation name.");
763 for (
auto &colName : colNames) {
770 using F_t = std::decay_t<F>;
771 using ColTypes_t =
typename TTraits::CallableTraits<F_t>::arg_types;
772 constexpr auto nColumns = ColTypes_t::list_size;
776 using RetType =
typename TTraits::CallableTraits<F_t>::ret_type;
779 if (colNames.size() > 1) {
782 throw std::runtime_error(
"This Vary call is varying multiple columns simultaneously but the expression "
783 "does not return an RVec of RVecs.");
786 auto allColTypesEqual =
787 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](
const std::string &t) { return t == colTypes[0]; });
788 if (!allColTypesEqual)
789 throw std::runtime_error(
"Cannot simultaneously vary multiple columns of different types.");
791 const auto &innerTypeID =
typeid(RDFInternal::InnerValueType_t<RetType>);
794 for (
auto i = 0u; i < colTypes.size(); ++i) {
795 const auto it = definesMap.find(colNames[i]);
796 const auto &expectedTypeID =
798 if (innerTypeID != expectedTypeID)
799 throw std::runtime_error(
"Varied values for column \"" + colNames[i] +
"\" have a different type (" +
804 const auto &retTypeID =
typeid(
typename RetType::value_type);
805 const auto &colName = colNames[0];
807 const auto it = definesMap.find(colName);
808 const auto &expectedTypeID =
810 if (retTypeID != expectedTypeID)
811 throw std::runtime_error(
"Varied values for column \"" + colName +
"\" have a different type (" +
817 if (retTypeName.empty()) {
821 retTypeName =
"CLING_UNKNOWN_TYPE_" + demangledType;
825 if (colNames.size() > 1) {
826 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
827 if (uniqueCols.size() != colNames.size())
828 throw std::logic_error(
"The same column was passed multiple times the same column twice: ");
832 auto variation = std::make_shared<RDFInternal::RVariation<F_t>>(
837 newCols.AddVariation(variation);
848 template <
typename F>
851 std::size_t nVariations, std::string_view variationName)
853 R__ASSERT(nVariations > 0 &&
"Must have at least one variation.");
855 std::vector<std::string> variationTags;
856 variationTags.reserve(nVariations);
857 for (std::size_t i = 0u; i < nVariations; ++i)
858 variationTags.emplace_back(std::to_string(i));
860 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
883 const std::vector<std::string> &variationTags, std::string_view variationName =
"")
885 std::vector<std::string> colNames{{std::string(colName)}};
886 const std::string theVariationName{variationName.empty() ? colName : variationName};
888 return Vary(std::move(colNames), expression, variationTags, theVariationName);
901 std::string_view variationName =
"")
903 std::vector<std::string> colNames{{std::string(colName)}};
904 const std::string theVariationName{variationName.empty() ? colName : variationName};
906 return Vary(std::move(colNames), expression, nVariations, theVariationName);
927 std::size_t nVariations, std::string_view variationName)
929 std::vector<std::string> variationTags;
930 variationTags.reserve(nVariations);
931 for (std::size_t i = 0u; i < nVariations; ++i)
932 variationTags.emplace_back(std::to_string(i));
934 return Vary(colNames, expression, std::move(variationTags), variationName);
955 const std::vector<std::string> &variationTags, std::string_view variationName)
957 R__ASSERT(variationTags.size() > 0 &&
"Must have at least one variation.");
958 R__ASSERT(colNames.size() > 0 &&
"Must have at least one varied column.");
959 R__ASSERT(!variationName.empty() &&
"Must provide a variation name.");
961 for (
auto &colName : colNames) {
969 if (colNames.size() > 1) {
970 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
971 if (uniqueCols.size() != colNames.size())
972 throw std::logic_error(
"The same column was passed multiple times the same column twice: ");
976 auto jittedVariation =
981 newColRegister.
AddVariation(std::move(jittedVariation));
1009 constexpr auto where =
"Alias";
1017 newCols.AddAlias(alias, validColumnName);
1021 return newInterface;
1083 template <
typename... ColumnTypes>
1088 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1113 const auto pairOfColumnLists =
1115 std::move(colListNoAliases), std::move(colListNoPoundSizes));
1116 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1117 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1120 const auto fullTreeName = treename;
1122 treename = parsedTreePath.fTreeName;
1123 const auto &dirname = parsedTreePath.fDirName;
1125 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
1126 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
1127 colListWithAliasesAndSizeBranches, options});
1130 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, colListNoAliasesWithSizeBranches);
1132 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1133 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, colListNoAliasesWithSizeBranches.size());
1154 std::string_view columnNameRegexp =
"",
1163 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1164 [](
const std::string &
name) { return name.size() < 13 || name.substr(0, 13) !=
"R_rdf_sizeof_"; });
1166 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1167 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1168 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1169 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1176 return Snapshot(treename, filename, selectedColumns, options);
1194 std::initializer_list<std::string> columnList,
1198 return Snapshot(treename, filename, selectedColumns, options);
1236 template <
typename... ColumnTypes>
1239 auto staticSeq = std::make_index_sequence<
sizeof...(ColumnTypes)>();
1240 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1253 if (columnList.empty()) {
1254 auto nEntries = *this->
Count();
1259 std::stringstream cacheCall;
1266 cacheCall <<
"*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1268 <<
") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1273 const auto validColumnNames =
1277 for (
const auto &colType : colTypes)
1278 cacheCall << colType <<
", ";
1279 if (!columnListWithoutSizeColumns.empty())
1280 cacheCall.seekp(-2, cacheCall.cur);
1281 cacheCall <<
">(*reinterpret_cast<std::vector<std::string>*>("
1306 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1307 [](
const std::string &
name) { return name.size() < 13 || name.substr(0, 13) !=
"R_rdf_sizeof_"; });
1309 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1310 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1311 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1312 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1314 return Cache(selectedColumns);
1326 return Cache(selectedColumns);
1350 if (stride == 0 || (end != 0 && end < begin))
1351 throw std::runtime_error(
"Range: stride must be strictly greater than 0 and end must be greater than begin.");
1355 auto rangePtr = std::make_shared<Range_t>(begin, end, stride,
fProxiedPtr);
1387 template <
typename F>
1390 using arg_types =
typename TTraits::CallableTraits<
decltype(
f)>::arg_types_nodecay;
1391 using ret_type =
typename TTraits::CallableTraits<
decltype(
f)>::ret_type;
1392 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(
f, arg_types()), columns);
1417 template <
typename F>
1421 constexpr auto nColumns = ColTypes_t::list_size;
1426 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1464 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1468 std::is_default_constructible<T>::value,
1469 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1470 return Reduce(std::move(
f), columnName, T());
1487 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1509 auto cSPtr = std::make_shared<ULong64_t>(0);
1510 using Helper_t = RDFInternal::CountHelper;
1514 return MakeResultPtr(cSPtr, *
fLoopManager, std::move(action));
1537 template <
typename T,
typename COLL = std::vector<T>>
1545 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1547 auto valuesPtr = std::make_shared<COLL>();
1552 return MakeResultPtr(valuesPtr, *
fLoopManager, std::move(action));
1580 template <
typename V = RDFDetail::RInferredType>
1587 std::shared_ptr<::TH1D>
h(
nullptr);
1589 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1590 h = model.GetHistogram();
1591 h->SetDirectory(
nullptr);
1594 if (
h->GetXaxis()->GetXmax() ==
h->GetXaxis()->GetXmin())
1595 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*
h);
1596 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns,
h,
h);
1616 template <
typename V = RDFDetail::RInferredType>
1619 const auto h_name = std::string(vName);
1620 const auto h_title = h_name +
";" + h_name +
";count";
1621 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1642 template <
typename V = RDFDetail::RInferredType,
typename W = RDFDetail::RInferredType>
1645 const std::vector<std::string_view> columnViews = {vName, wName};
1649 std::shared_ptr<::TH1D>
h(
nullptr);
1651 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1654 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns,
h,
h);
1676 template <
typename V = RDFDetail::RInferredType,
typename W = RDFDetail::RInferredType>
1680 std::string str_vName{vName};
1681 std::string str_wName{wName};
1682 const auto h_name = str_vName +
"_weighted_" + str_wName;
1683 const auto h_title = str_vName +
", weights: " + str_wName +
";" + str_vName +
";count * " + str_wName;
1684 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1696 template <
typename V,
typename W>
1699 return Histo1D<V, W>(model,
"",
"");
1730 template <
typename V1 = RDFDetail::RInferredType,
typename V2 = RDFDetail::RInferredType>
1733 std::shared_ptr<::TH2D>
h(
nullptr);
1735 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1738 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*
h)) {
1739 throw std::runtime_error(
"2D histograms with no axes limits are not supported yet.");
1741 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1745 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns,
h,
h);
1774 Histo2D(
const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1776 std::shared_ptr<::TH2D>
h(
nullptr);
1778 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1781 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*
h)) {
1782 throw std::runtime_error(
"2D histograms with no axes limits are not supported yet.");
1784 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1788 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns,
h,
h);
1791 template <
typename V1,
typename V2,
typename W>
1794 return Histo2D<V1, V2, W>(model,
"",
"",
"");
1827 std::string_view v3Name =
"")
1829 std::shared_ptr<::TH3D>
h(
nullptr);
1831 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1834 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*
h)) {
1835 throw std::runtime_error(
"3D histograms with no axes limits are not supported yet.");
1837 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1841 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns,
h,
h);
1876 std::string_view v3Name, std::string_view wName)
1878 std::shared_ptr<::TH3D>
h(
nullptr);
1880 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1883 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*
h)) {
1884 throw std::runtime_error(
"3D histograms with no axes limits are not supported yet.");
1886 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1890 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns,
h,
h);
1893 template <
typename V1,
typename V2,
typename V3,
typename W>
1896 return Histo3D<V1, V2, V3, W>(model,
"",
"",
"",
"");
1921 template <
typename FirstColumn,
typename... OtherColumns>
1924 std::shared_ptr<::THnD>
h(
nullptr);
1926 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1929 if (
int(columnList.size()) == (
h->GetNdimensions() + 1)) {
1931 }
else if (
int(columnList.size()) !=
h->GetNdimensions()) {
1932 throw std::runtime_error(
"Wrong number of columns for the specified number of histogram axes.");
1935 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList,
h,
h);
1957 std::shared_ptr<::THnD>
h(
nullptr);
1959 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
1962 if (
int(columnList.size()) == (
h->GetNdimensions() + 1)) {
1964 }
else if (
int(columnList.size()) !=
h->GetNdimensions()) {
1965 throw std::runtime_error(
"Wrong number of columns for the specified number of histogram axes.");
1968 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList,
h,
h,
2000 template <
typename V1 = RDFDetail::RInferredType,
typename V2 = RDFDetail::RInferredType>
2003 auto graph = std::make_shared<::TGraph>();
2004 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2012 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
2013 const auto g_name = std::string(v1Name) +
"_vs_" + std::string(v2Name);
2014 const auto g_title = std::string(v1Name) +
" vs " + std::string(v2Name);
2015 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2016 graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
2017 graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
2020 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns,
graph,
graph);
2046 template <
typename V1 = RDFDetail::RInferredType,
typename V2 = RDFDetail::RInferredType>
2050 std::shared_ptr<::TProfile>
h(
nullptr);
2052 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
2056 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*
h)) {
2057 throw std::runtime_error(
"Profiles with no axes limits are not supported yet.");
2059 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2063 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns,
h,
h);
2095 std::shared_ptr<::TProfile>
h(
nullptr);
2097 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
2101 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*
h)) {
2102 throw std::runtime_error(
"Profile histograms with no axes limits are not supported yet.");
2104 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2108 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns,
h,
h);
2114 template <
typename V1,
typename V2,
typename W>
2117 return Profile1D<V1, V2, W>(model,
"",
"",
"");
2150 std::string_view v2Name =
"", std::string_view v3Name =
"")
2152 std::shared_ptr<::TProfile2D>
h(
nullptr);
2154 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
2158 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*
h)) {
2159 throw std::runtime_error(
"2D profiles with no axes limits are not supported yet.");
2161 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2165 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns,
h,
h);
2198 std::string_view v3Name, std::string_view wName)
2200 std::shared_ptr<::TProfile2D>
h(
nullptr);
2202 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(
kError);
2206 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*
h)) {
2207 throw std::runtime_error(
"2D profiles with no axes limits are not supported yet.");
2209 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2213 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns,
h,
h);
2218 template <
typename V1,
typename V2,
typename V3,
typename W>
2221 return Profile2D<V1, V2, V3, W>(model,
"",
"",
"",
"");
2261 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2262 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*
h)) {
2263 throw std::runtime_error(
"The absence of axes limits is not supported yet.");
2265 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList,
h,
h,
2284 template <
typename V = RDFDetail::RInferredType>
2288 if (!value.empty()) {
2289 columns.emplace_back(std::string(value));
2292 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2295 return Fill<V>(
TStatistic(), validColumnNames);
2316 template <
typename V = RDFDetail::RInferredType,
typename W = RDFDetail::RInferredType>
2319 ColumnNames_t columns{std::string(value), std::string(weight)};
2320 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2321 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2327 if (vIsInferred && wIsInferred) {
2329 }
else if (vIsInferred != wIsInferred) {
2330 std::string error(
"The ");
2331 error += vIsInferred ?
"value " :
"weight ";
2332 error +=
"column type is explicit, while the ";
2333 error += vIsInferred ?
"weight " :
"value ";
2334 error +=
" is specified to be inferred. This case is not supported: please specify both types or none.";
2335 throw std::runtime_error(error);
2337 return Fill<V, W>(
TStatistic(), validColumnNames);
2362 template <
typename T = RDFDetail::RInferredType>
2366 using RetType_t = RDFDetail::MinReturnType_t<T>;
2367 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2368 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
2392 template <
typename T = RDFDetail::RInferredType>
2396 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2397 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2398 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
2421 template <
typename T = RDFDetail::RInferredType>
2425 auto meanV = std::make_shared<double>(0);
2426 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
2449 template <
typename T = RDFDetail::RInferredType>
2453 auto stdDeviationV = std::make_shared<double>(0);
2454 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
2480 template <
typename T = RDFDetail::RInferredType>
2482 Sum(std::string_view columnName =
"",
2483 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2486 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2487 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
2517 bool returnEmptyReport =
false;
2524 returnEmptyReport =
true;
2526 auto rep = std::make_shared<RCutFlowReport>();
2527 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2533 return MakeResultPtr(rep, *
fLoopManager, std::move(action));
2552 std::unordered_set<std::string> allColumns;
2554 auto addIfNotInternal = [&allColumns](std::string_view colName) {
2556 allColumns.emplace(colName);
2561 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
2566 allColumns.emplace(bName);
2571 if (s.rfind(
"R_rdf_sizeof", 0) != 0)
2572 allColumns.emplace(s);
2577 std::sort(ret.begin(), ret.end());
2600 const bool convertVector2RVec =
true;
2602 convertVector2RVec);
2639 std::set<std::string> definedColumnNamesSet;
2641 definedColumnNamesSet.insert(
name);
2644 const std::vector<std::string> metadataProperties = {
"Columns in total",
"Columns from defines",
2645 "Event loops run",
"Processing slots"};
2646 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2647 std::to_string(definedColumnNamesSet.size()),
2654 const auto columnWidthValues =
2655 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(),
static_cast<std::size_t
>(5u));
2656 std::stringstream ss;
2657 ss << std::left << std::setw(columnWidthProperties) <<
"Property" << std::setw(columnWidthValues) <<
"Value\n"
2658 << std::setw(columnWidthProperties) <<
"--------" << std::setw(columnWidthValues) <<
"-----\n";
2662 for (
auto i = 0u; i < metadataProperties.size(); i++) {
2663 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2664 << std::setw(columnWidthValues) << metadataValues[i] <<
'\n';
2672 ss << std::left << std::setw(columnWidthNames) <<
"Column" << std::setw(columnWidthTypes) <<
"Type"
2674 << std::setw(columnWidthNames) <<
"------" << std::setw(columnWidthTypes) <<
"----"
2678 const auto nCols = columnNames.size();
2679 for (
auto i = 0u; i < nCols; i++) {
2680 auto origin =
"Dataset";
2681 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2683 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2684 << columnTypes[i] << origin;
2730 for (
const auto &column : columns) {
2732 definedColumns.emplace_back(column.first);
2735 return definedColumns;
2776 const auto branchNamesEnd = branchNames.end();
2777 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2862 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2863 typename ArgTypes =
typename TTraits::CallableTraits<AccFun>::arg_types,
2864 typename ArgTypesNoDecay =
typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2865 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2866 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2869 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2875 auto accObjPtr = std::make_shared<U>(aggIdentity);
2876 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2878 auto action = std::make_unique<Action_t>(
2879 Helper_t(std::move(aggregator), std::move(merger), accObjPtr,
fLoopManager->
GetNSlots()), validColumnNames,
2881 return MakeResultPtr(accObjPtr, *
fLoopManager, std::move(action));
2897 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2898 typename ArgTypes =
typename TTraits::CallableTraits<AccFun>::arg_types,
2899 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2900 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2904 std::is_default_constructible<U>::value,
2905 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2906 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2954 using HelperT = std::decay_t<Helper>;
2957 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2958 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2960 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2961 auto resPtr = hPtr->GetResultPtr();
2963 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2966 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2996 template <
typename... ColumnTypes>
3000 auto newCols = columnList;
3001 newCols.insert(newCols.begin(),
"rdfentry_");
3002 auto displayer = std::make_shared<RDisplay>(newCols,
GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3003 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3006 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer));
3023 auto newCols = columnList;
3024 newCols.insert(newCols.begin(),
"rdfentry_");
3025 auto displayer = std::make_shared<RDisplay>(newCols,
GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3026 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3027 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3028 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), columnList.size() + 1);
3042 Display(std::string_view columnNameRegexp =
"",
size_t nRows = 5,
size_t nMaxCollectionElements = 10)
3046 return Display(selectedColumns, nRows, nMaxCollectionElements);
3057 Display(std::initializer_list<std::string> columnList,
size_t nRows = 5,
size_t nMaxCollectionElements = 10)
3060 return Display(selectedColumns, nRows, nMaxCollectionElements);
3067 const std::string entryColName =
"rdfentry_";
3068 const std::string entryColType =
"ULong64_t";
3069 auto entryColGen = [](
unsigned int,
ULong64_t entry) {
return entry; };
3070 using NewColEntry_t =
RDFDetail::RDefine<
decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
3072 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
3077 const std::string slotColName =
"rdfslot_";
3078 const std::string slotColType =
"unsigned int";
3079 auto slotColGen = [](
unsigned int slot) {
return slot; };
3080 using NewColSlot_t =
RDFDetail::RDefine<
decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
3082 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen),
ColumnNames_t{},
3092 std::vector<std::string> types;
3094 for (
auto column : columnList) {
3103 std::string error(callerName);
3104 error +=
" was called with ImplicitMT enabled, but multi-thread is not supported.";
3105 throw std::runtime_error(error);
3114 template <
typename ActionTag,
typename... ColTypes,
typename ActionResultType,
3115 typename HelperArgType = ActionResultType,
3116 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value,
int> = 0>
3119 const std::shared_ptr<HelperArgType> &helperArg,
const int = -1)
3121 constexpr auto nColumns =
sizeof...(ColTypes);
3128 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots,
fProxiedPtr, ActionTag{},
3137 template <
typename ActionTag,
typename... ColTypes,
typename ActionResultType,
3138 typename HelperArgType = ActionResultType,
3139 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value,
int> = 0>
3141 const std::shared_ptr<HelperArgType> &helperArg,
const int nColumns = -1)
3143 auto realNColumns = (nColumns > -1 ? nColumns :
sizeof...(ColTypes));
3149 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
3152 using BaseNodeType_t =
typename std::remove_pointer_t<
decltype(upcastNodeOnHeap)>::element_type;
3155 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(
3157 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
3160 validColumnNames, upcastNodeOnHeap,
typeid(std::shared_ptr<HelperArgType>),
typeid(ActionTag), helperArgOnHeap,
3163 return MakeResultPtr(
r, *
fLoopManager, std::move(jittedAction));
3166 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3171 if (where.compare(0, 8,
"Redefine") != 0) {
3180 using ArgTypes_t =
typename TTraits::CallableTraits<F>::arg_types;
3182 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>
::type;
3184 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>
::type;
3186 constexpr auto nColumns = ColTypes_t::list_size;
3193 if (retTypeName.empty()) {
3197 retTypeName =
"CLING_UNKNOWN_TYPE_" + demangledType;
3201 auto newColumn = std::make_shared<NewCol_t>(
name, retTypeName, std::forward<F>(expression), validColumnNames,
3209 return newInterface;
3215 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3216 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3217 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3218 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3221 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3222 "Error in `Define`: type returned by expression is not default-constructible");
3226 template <
typename... ColumnTypes>
3233 const auto validCols =
GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3238 const auto &treename = parsedTreePath.fTreeName;
3239 const auto &dirname = parsedTreePath.fDirName;
3241 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3242 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3245 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
3247 auto resPtr =
CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
3256 template <
typename... ColTypes, std::size_t... S>
3262 constexpr bool areCopyConstructible =
3263 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3264 static_assert(areCopyConstructible,
"Columns of a type which is not copy constructible cannot be cached yet.");
3268 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3269 auto ds = std::make_unique<
RLazyDS<ColTypes...>>(
3270 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3277 template <
typename Helper,
typename ActionResultType>
3279 const std::shared_ptr<Helper> &hPtr,
3283 return CreateAction<RDFInternal::ActionTags::Book>({}, resPtr, hPtr, 0u);
3286 template <
typename Helper,
typename ActionResultType,
typename... Others>
3287 RResultPtr<ActionResultType>
3289 const std::shared_ptr<Helper>& ,
3292 throw std::logic_error(std::string(
"An action was booked with no input columns, but the action requires "
3293 "columns! The action helper type was ") +
3314 template <
typename... ColumnTypes>
unsigned long long ULong64_t
The head node of a RDF computation graph.
ULong64_t GetNEmptyEntries() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
unsigned int GetNRuns() const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
A binder for user-defined columns and aliases.
void AddVariation(const std::shared_ptr< RVariationBase > &variation)
Register a new systematic variation.
const DefinesMap_t & GetColumns() const
Returns a map of pointers to the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
void AddColumn(const std::shared_ptr< RDFDetail::RDefineBase > &column)
Add a new booked column.
const VariationsMap_t & GetVariations() const
Returns the multimap of systematic variations, see fVariations.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns (Defines + Aliases).
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual std::string GetLabel()
Return a string representation of the datasource type.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RVariationsDescription GetVariations()
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RLoopManager * GetLoopManager() const
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
std::string DescribeDataset() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns, RDataSource *ds)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RDFDescription Describe()
Return information about the dataframe.
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Smart pointer for the return type of actions.
A descriptor for the systematic variations known to a given RDataFrame node.
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
A chain is a collection of files containing TTree objects.
TDirectory::TContext keeps track and restore the current directory.
A TGraph is an object made of two arrays X and Y with npoints each.
Statistical variable, defined by its mean and variance (RMS).
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
RFriendInfo GetFriendInfo(const TTree &tree)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.