Logo ROOT   master
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDF_TINTERFACE
12 #define ROOT_RDF_TINTERFACE
13 
14 #include "ROOT/RDataSource.hxx"
17 #include "ROOT/RDF/HistoModels.hxx"
19 #include "ROOT/RDF/RRange.hxx"
20 #include "ROOT/RDF/Utils.hxx"
22 #include "ROOT/RDF/RLazyDSImpl.hxx"
23 #include "ROOT/RResultPtr.hxx"
25 #include "ROOT/RStringView.hxx"
26 #include "ROOT/TypeTraits.hxx"
27 #include "RtypesCore.h" // for ULong64_t
28 #include "TH1.h" // For Histo actions
29 #include "TH2.h" // For Histo actions
30 #include "TH3.h" // For Histo actions
31 #include "TProfile.h"
32 #include "TProfile2D.h"
33 #include "TStatistic.h"
34 
35 #include <algorithm>
36 #include <cstddef>
37 #include <initializer_list>
38 #include <limits>
39 #include <memory>
40 #include <sstream>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits> // is_same, enable_if
44 #include <typeinfo>
45 #include <vector>
46 
47 class TGraph;
48 
49 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50 namespace ROOT {
51 void DisableImplicitMT();
52 bool IsImplicitMTEnabled();
53 void EnableImplicitMT(UInt_t numthreads);
54 class RDataFrame;
55 namespace Internal {
56 namespace RDF {
57 class GraphCreatorHelper;
58 }
59 }
60 }
61 namespace cling {
62 std::string printValue(ROOT::RDataFrame *tdf);
63 }
64 
65 namespace ROOT {
66 namespace RDF {
67 namespace RDFDetail = ROOT::Detail::RDF;
69 namespace TTraits = ROOT::TypeTraits;
70 
71 template <typename Proxied, typename DataSource>
72 class RInterface;
73 
74 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75 
76 // clang-format off
77 /**
78  * \class ROOT::RDF::RInterface
79  * \ingroup dataframe
80  * \brief The public interface to the RDataFrame federation of classes
81  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83  *
84  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86  */
87 // clang-format on
88 template <typename Proxied, typename DataSource = void>
89 class RInterface {
90  using DS_t = DataSource;
91  using ColumnNames_t = RDFDetail::ColumnNames_t;
95  friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
96  friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
97 
98  template <typename T, typename W>
99  friend class RInterface;
100 
101  std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102  ///< The RLoopManager at the root of this computation graph. Never null.
104  /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
106 
107  /// Contains the custom columns defined up to this node.
109 
110 public:
111  ////////////////////////////////////////////////////////////////////////////
112  /// \brief Copy-assignment operator for RInterface.
113  RInterface &operator=(const RInterface &) = default;
114 
115  ////////////////////////////////////////////////////////////////////////////
116  /// \brief Copy-ctor for RInterface.
117  RInterface(const RInterface &) = default;
118 
119  ////////////////////////////////////////////////////////////////////////////
120  /// \brief Move-ctor for RInterface.
121  RInterface(RInterface &&) = default;
122 
123  ////////////////////////////////////////////////////////////////////////////
124  /// \brief Only enabled when building a RInterface<RLoopManager>
125  template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126  RInterface(const std::shared_ptr<Proxied> &proxied)
127  : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128  {
130  }
131 
132  ////////////////////////////////////////////////////////////////////////////
133  /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134  /// Different RDataFrame methods return different C++ types. All nodes, however,
135  /// can be cast to this common type at the cost of a small performance penalty.
136  /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137  /// around via (non-template, C++11) helper functions.
138  /// Example usage:
139  /// ~~~{.cpp}
140  /// // a function that conditionally adds a Range to a RDataFrame node.
141  /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142  /// {
143  /// return mustAddRange ? df.Range(1) : df;
144  /// }
145  /// // use as :
146  /// ROOT::RDataFrame df(10);
147  /// auto maybeRanged = MaybeAddRange(df, true);
148  /// ~~~
149  /// Note that it is not a problem to pass RNode's by value.
150  operator RNode() const
151  {
152  return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
153  fDataSource);
154  }
155 
156  ////////////////////////////////////////////////////////////////////////////
157  /// \brief Append a filter to the call graph.
158  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159  /// signalling whether the event has passed the selection (true) or not (false).
160  /// \param[in] columns Names of the columns/branches in input to the filter function.
161  /// \param[in] name Optional name of this filter. See `Report`.
162  /// \return the filter node of the computation graph.
163  ///
164  /// Append a filter node at the point of the call graph corresponding to the
165  /// object this method is called on.
166  /// The callable `f` should not have side-effects (e.g. modification of an
167  /// external or static variable) to ensure correct results when implicit
168  /// multi-threading is active.
169  ///
170  /// RDataFrame only evaluates filters when necessary: if multiple filters
171  /// are chained one after another, they are executed in order and the first
172  /// one returning false causes the event to be discarded.
173  /// Even if multiple actions or transformations depend on the same filter,
174  /// it is executed once per entry. If its result is requested more than
175  /// once, the cached result is served.
176  ///
177  /// ### Example usage:
178  /// ~~~{.cpp}
179  /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180  /// auto filtered = df.Filter(myCut, {"x", "y"});
181  ///
182  /// // String: it must contain valid C++ except that column names can be used instead of variable names
183  /// auto filtered = df.Filter("x*y > 0");
184  /// ~~~
185  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
187  Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188  {
189  RDFInternal::CheckFilter(f);
190  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191  constexpr auto nColumns = ColTypes_t::list_size;
192  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193  const auto newColumns =
194  CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195 
196  using F_t = RDFDetail::RFilter<F, Proxied>;
197 
198  auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199  fLoopManager->Book(filterPtr.get());
200  return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201  }
202 
203  ////////////////////////////////////////////////////////////////////////////
204  /// \brief Append a filter to the call graph.
205  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206  /// signalling whether the event has passed the selection (true) or not (false).
207  /// \param[in] name Optional name of this filter. See `Report`.
208  /// \return the filter node of the computation graph.
209  ///
210  /// Refer to the first overload of this method for the full documentation.
211  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
213  {
214  // The sfinae is there in order to pick up the overloaded method which accepts two strings
215  // rather than this template method.
216  return Filter(f, {}, name);
217  }
218 
219  ////////////////////////////////////////////////////////////////////////////
220  /// \brief Append a filter to the call graph.
221  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222  /// signalling whether the event has passed the selection (true) or not (false).
223  /// \param[in] columns Names of the columns/branches in input to the filter function.
224  /// \return the filter node of the computation graph.
225  ///
226  /// Refer to the first overload of this method for the full documentation.
227  template <typename F>
228  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229  {
230  return Filter(f, ColumnNames_t{columns});
231  }
232 
233  ////////////////////////////////////////////////////////////////////////////
234  /// \brief Append a filter to the call graph.
235  /// \param[in] expression The filter expression in C++
236  /// \param[in] name Optional name of this filter. See `Report`.
237  /// \return the filter node of the computation graph.
238  ///
239  /// The expression is just-in-time compiled and used to filter entries. It must
240  /// be valid C++ syntax in which variable names are substituted with the names
241  /// of branches/columns.
242  ///
243  /// ### Example usage:
244  /// ~~~{.cpp}
245  /// auto filtered_df = df.Filter("myCollection.size() > 3");
246  /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247  /// ~~~
248  RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
249  {
250  // deleted by the jitted call to JitFilterHelper
251  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254  const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255 
256  RDFInternal::BookFilterJit(jittedFilter, upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
258 
259  fLoopManager->Book(jittedFilter.get());
261  fDataSource);
262  }
263 
264  // clang-format off
265  ////////////////////////////////////////////////////////////////////////////
266  /// \brief Creates a custom column
267  /// \param[in] name The name of the custom column.
268  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
269  /// \param[in] columns Names of the columns/branches in input to the producer function.
270  /// \return the first node of the computation graph for which the new quantity is defined.
271  ///
272  /// Create a custom column that will be visible from all subsequent nodes
273  /// of the functional chain. The `expression` is only evaluated for entries that pass
274  /// all the preceding filters.
275  /// A new variable is created called `name`, accessible as if it was contained
276  /// in the dataset from subsequent transformations/actions.
277  ///
278  /// Use cases include:
279  /// * caching the results of complex calculations for easy and efficient multiple access
280  /// * extraction of quantities of interest from complex objects
281  ///
282  /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
283  ///
284  /// ### Example usage:
285  /// ~~~{.cpp}
286  /// // assuming a function with signature:
287  /// double myComplexCalculation(const RVec<float> &muon_pts);
288  /// // we can pass it directly to Define
289  /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
290  /// // alternatively, we can pass the body of the function as a string, as in Filter:
291  /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
292  /// ~~~
293  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
294  RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
295  {
296  return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
297  }
298  // clang-format on
299 
300  // clang-format off
301  ////////////////////////////////////////////////////////////////////////////
302  /// \brief Creates a custom column with a value dependent on the processing slot.
303  /// \param[in] name The name of the custom column.
304  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
305  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
306  /// \return the first node of the computation graph for which the new quantity is defined.
307  ///
308  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
309  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
310  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
311  /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
312  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
313  ///
314  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
315  /// ~~~{.cpp}
316  /// int function(unsigned int, double, double);
317  /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
318  /// df.DefineSlot("x", function, {"column1", "column2"})
319  /// ~~~
320  ///
321  /// See Define for more information.
322  template <typename F>
323  RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
324  {
325  return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
326  }
327  // clang-format on
328 
329  // clang-format off
330  ////////////////////////////////////////////////////////////////////////////
331  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
332  /// \param[in] name The name of the custom column.
333  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
334  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
335  /// \return the first node of the computation graph for which the new quantity is defined.
336  ///
337  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
338  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
339  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
340  /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
341  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
342  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
343  ///
344  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
345  /// ~~~{.cpp}
346  /// int function(unsigned int, ULong64_t, double, double);
347  /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
348  /// DefineSlotEntry("x", function, {"column1", "column2"})
349  /// ~~~
350  ///
351  /// See Define for more information.
352  template <typename F>
353  RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
354  {
355  return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
356  }
357  // clang-format on
358 
359  ////////////////////////////////////////////////////////////////////////////
360  /// \brief Creates a custom column
361  /// \param[in] name The name of the custom column.
362  /// \param[in] expression An expression in C++ which represents the temporary value
363  /// \return the first node of the computation graph for which the new quantity is defined.
364  ///
365  /// The expression is just-in-time compiled and used to produce the column entries.
366  /// It must be valid C++ syntax in which variable names are substituted with the names
367  /// of branches/columns.
368  ///
369  /// Refer to the first overload of this method for the full documentation.
370  RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
371  {
372  // this check must be done before jitting lest we throw exceptions in jitted code
376 
377  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
378  auto jittedCustomColumn = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fCustomColumns,
379  fLoopManager->GetBranchNames(), upcastNodeOnHeap);
380 
382  newCols.AddName(name);
383  newCols.AddColumn(jittedCustomColumn, name);
384 
385  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
386 
387  return newInterface;
388  }
389 
390  ////////////////////////////////////////////////////////////////////////////
391  /// \brief Allow to refer to a column with a different name
392  /// \param[in] alias name of the column alias
393  /// \param[in] columnName of the column to be aliased
394  /// \return the first node of the computation graph for which the alias is available.
395  ///
396  /// Aliasing an alias is supported.
397  ///
398  /// ### Example usage:
399  /// ~~~{.cpp}
400  /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
401  /// ~~~
402  RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
403  {
404  // The symmetry with Define is clear. We want to:
405  // - Create globally the alias and return this very node, unchanged
406  // - Make aliases accessible based on chains and not globally
407 
408  // Helper to find out if a name is a column
409  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
410 
411  // If the alias name is a column name, there is a problem
413  fLoopManager->GetAliasMap(), dsColumnNames);
414 
415  const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
416 
417  fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
418 
420 
421  newCols.AddName(alias);
422  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
423 
424  return newInterface;
425  }
426 
427  ////////////////////////////////////////////////////////////////////////////
428  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
429  /// \tparam ColumnTypes variadic list of branch/column types.
430  /// \param[in] treename The name of the output TTree.
431  /// \param[in] filename The name of the output TFile.
432  /// \param[in] columnList The list of names of the columns/branches to be written.
433  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
434  /// \return a `RDataFrame` that wraps the snapshotted dataset.
435  ///
436  /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
437  /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
438  /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
439  /// written out and it appears before the array in the columnList.
440  ///
441  /// ### Example invocations:
442  ///
443  /// ~~~{.cpp}
444  /// // without specifying template parameters (column types automatically deduced)
445  /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
446  ///
447  /// // specifying template parameters ("x" is `int`, "y" is `float`)
448  /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
449  /// ~~~
450  ///
451  /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
452  /// `RSnapshotOptions`:
453  /// ~~~{.cpp}
454  /// RSnapshotOptions opts;
455  /// opts.fLazy = true;
456  /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
457  /// ~~~
458  template <typename... ColumnTypes>
460  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
461  const RSnapshotOptions &options = RSnapshotOptions())
462  {
463  return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
464  }
465 
466  ////////////////////////////////////////////////////////////////////////////
467  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
468  /// \param[in] treename The name of the output TTree.
469  /// \param[in] filename The name of the output TFile.
470  /// \param[in] columnList The list of names of the columns/branches to be written.
471  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
472  /// \return a `RDataFrame` that wraps the snapshotted dataset.
473  ///
474  /// This function returns a `RDataFrame` built with the output tree as a source.
475  /// The types of the columns are automatically inferred and do not need to be specified.
476  ///
477  /// See above for a more complete description and example usages.
478  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
479  const ColumnNames_t &columnList,
480  const RSnapshotOptions &options = RSnapshotOptions())
481  {
482  // Early return: if the list of columns is empty, just return an empty RDF
483  // If we proceed, the jitted call will not compile!
484  if (columnList.empty()) {
485  auto nEntries = *this->Count();
486  auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
487  return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
488  }
489  std::stringstream snapCall;
490  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
493 
494  // build a string equivalent to
495  // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
497  snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
498  << RDFInternal::PrettyPrintAddr(&resPtr)
499  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
500  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
501 
502 
503  const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
504  const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
505  "Snapshot", /*vector2rvec=*/false);
506 
507  for (auto &colType : colTypes)
508  snapCall << colType << ", ";
509  if (!colTypes.empty())
510  snapCall.seekp(-2, snapCall.cur); // remove the last ",
511  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
512  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
513  << RDFInternal::PrettyPrintAddr(&columnList) << "),"
514  << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
515  // jit snapCall, return result
516  RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
517  return resPtr;
518  }
519 
520  // clang-format off
521  ////////////////////////////////////////////////////////////////////////////
522  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
523  /// \param[in] treename The name of the output TTree.
524  /// \param[in] filename The name of the output TFile.
525  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
526  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
527  /// \return a `RDataFrame` that wraps the snapshotted dataset.
528  ///
529  /// This function returns a `RDataFrame` built with the output tree as a source.
530  /// The types of the columns are automatically inferred and do not need to be specified.
531  ///
532  /// See above for a more complete description and example usages.
533  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
534  std::string_view columnNameRegexp = "",
535  const RSnapshotOptions &options = RSnapshotOptions())
536  {
537  auto selectedColumns = RDFInternal::ConvertRegexToColumns(fCustomColumns,
539  fDataSource,
540  columnNameRegexp,
541  "Snapshot");
542  return Snapshot(treename, filename, selectedColumns, options);
543  }
544  // clang-format on
545 
546  // clang-format off
547  ////////////////////////////////////////////////////////////////////////////
548  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
549  /// \param[in] treename The name of the output TTree.
550  /// \param[in] filename The name of the output TFile.
551  /// \param[in] columnList The list of names of the columns/branches to be written.
552  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
553  /// \return a `RDataFrame` that wraps the snapshotted dataset.
554  ///
555  /// This function returns a `RDataFrame` built with the output tree as a source.
556  /// The types of the columns are automatically inferred and do not need to be specified.
557  ///
558  /// See above for a more complete description and example usages.
559  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
560  std::initializer_list<std::string> columnList,
561  const RSnapshotOptions &options = RSnapshotOptions())
562  {
563  ColumnNames_t selectedColumns(columnList);
564  return Snapshot(treename, filename, selectedColumns, options);
565  }
566  // clang-format on
567 
568  ////////////////////////////////////////////////////////////////////////////
569  /// \brief Save selected columns in memory
570  /// \tparam ColumnTypes variadic list of branch/column types.
571  /// \param[in] columns to be cached in memory.
572  /// \return a `RDataFrame` that wraps the cached dataset.
573  ///
574  /// This action returns a new `RDataFrame` object, completely detached from
575  /// the originating `RDataFrame`. The new dataframe only contains the cached
576  /// columns and stores their content in memory for fast, zero-copy subsequent access.
577  ///
578  /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
579  /// fits in memory and that will be accessed many times.
580  ///
581  /// ### Example usage:
582  ///
583  /// **Types and columns specified:**
584  /// ~~~{.cpp}
585  /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
586  /// ~~~
587  ///
588  /// **Types inferred and columns specified (this invocation relies on jitting):**
589  /// ~~~{.cpp}
590  /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
591  /// ~~~
592  ///
593  /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
594  /// ~~~{.cpp}
595  /// auto cache_all_cols_df = df.Cache(myRegexp);
596  /// ~~~
597  template <typename... ColumnTypes>
599  {
600  auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
601  return CacheImpl<ColumnTypes...>(columnList, staticSeq);
602  }
603 
604  ////////////////////////////////////////////////////////////////////////////
605  /// \brief Save selected columns in memory
606  /// \param[in] columns to be cached in memory
607  /// \return a `RDataFrame` that wraps the cached dataset.
608  ///
609  /// See the previous overloads for more information.
611  {
612  // Early return: if the list of columns is empty, just return an empty RDF
613  // If we proceed, the jitted call will not compile!
614  if (columnList.empty()) {
615  auto nEntries = *this->Count();
616  RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
617  return emptyRDF;
618  }
619 
620  std::stringstream cacheCall;
621  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
624  // build a string equivalent to
625  // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
626  RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
627  cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
628  << RDFInternal::PrettyPrintAddr(&resRDF)
629  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
630  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
631 
632  const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
633  const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
634  "Cache", /*vector2rvec=*/false);
635  for (const auto &colType : colTypes)
636  cacheCall << colType << ", ";
637  if (!columnList.empty())
638  cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
639  cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
640  << RDFInternal::PrettyPrintAddr(&columnList) << "));";
641  // jit cacheCall, return result
642  RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
643  return resRDF;
644  }
645 
646  ////////////////////////////////////////////////////////////////////////////
647  /// \brief Save selected columns in memory
648  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
649  /// \return a `RDataFrame` that wraps the cached dataset.
650  ///
651  /// The existing columns are matched against the regular expression. If the string provided
652  /// is empty, all columns are selected. See the previous overloads for more information.
653  RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
654  {
655 
657  columnNameRegexp, "Cache");
658  return Cache(selectedColumns);
659  }
660 
661  ////////////////////////////////////////////////////////////////////////////
662  /// \brief Save selected columns in memory
663  /// \param[in] columns to be cached in memory.
664  /// \return a `RDataFrame` that wraps the cached dataset.
665  ///
666  /// See the previous overloads for more information.
667  RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
668  {
669  ColumnNames_t selectedColumns(columnList);
670  return Cache(selectedColumns);
671  }
672 
673  // clang-format off
674  ////////////////////////////////////////////////////////////////////////////
675  /// \brief Creates a node that filters entries based on range: [begin, end)
676  /// \param[in] begin Initial entry number considered for this range.
677  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
678  /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
679  /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
680  ///
681  /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
682  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
683  ///
684  /// ### Example usage:
685  /// ~~~{.cpp}
686  /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
687  /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
688  /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
689  /// ~~~
690  // clang-format on
691  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
692  {
693  // check invariants
694  if (stride == 0 || (end != 0 && end < begin))
695  throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
696  CheckIMTDisabled("Range");
697 
699  auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
700  fLoopManager->Book(rangePtr.get());
702  return tdf_r;
703  }
704 
705  // clang-format off
706  ////////////////////////////////////////////////////////////////////////////
707  /// \brief Creates a node that filters entries based on range
708  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
709  /// \return a node of the computation graph for which the range is defined.
710  ///
711  /// See the other Range overload for a detailed description.
712  // clang-format on
713  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
714 
715  // clang-format off
716  ////////////////////////////////////////////////////////////////////////////
717  /// \brief Execute a user-defined function on each entry (*instant action*)
718  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
719  /// \param[in] columns Names of the columns/branches in input to the user function.
720  ///
721  /// The callable `f` is invoked once per entry. This is an *instant action*:
722  /// upon invocation, an event loop as well as execution of all scheduled actions
723  /// is triggered.
724  /// Users are responsible for the thread-safety of this callable when executing
725  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
726  ///
727  /// ### Example usage:
728  /// ~~~{.cpp}
729  /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
730  /// ~~~
731  // clang-format on
732  template <typename F>
733  void Foreach(F f, const ColumnNames_t &columns = {})
734  {
736  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
737  ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
738  }
739 
740  // clang-format off
741  ////////////////////////////////////////////////////////////////////////////
742  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
743  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
744  /// \param[in] columns Names of the columns/branches in input to the user function.
745  ///
746  /// Same as `Foreach`, but the user-defined function takes an extra
747  /// `unsigned int` as its first parameter, the *processing slot index*.
748  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
749  /// for each thread of execution.
750  /// This is meant as a helper in writing thread-safe `Foreach`
751  /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
752  /// The user-defined processing callable is able to follow different
753  /// *streams of processing* indexed by the first parameter.
754  /// `ForeachSlot` works just as well with single-thread execution: in that
755  /// case `slot` will always be `0`.
756  ///
757  /// ### Example usage:
758  /// ~~~{.cpp}
759  /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
760  /// ~~~
761  // clang-format on
762  template <typename F>
763  void ForeachSlot(F f, const ColumnNames_t &columns = {})
764  {
766  constexpr auto nColumns = ColTypes_t::list_size;
767 
768  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
769 
770  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
771 
772  using Helper_t = RDFInternal::ForeachSlotHelper<F>;
774 
775  auto action =
776  std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
777  fLoopManager->Book(action.get());
778 
779  fLoopManager->Run();
780  }
781 
782  // clang-format off
783  ////////////////////////////////////////////////////////////////////////////
784  /// \brief Execute a user-defined reduce operation on the values of a column.
785  /// \tparam F The type of the reduce callable. Automatically deduced.
786  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
787  /// \param[in] f A callable with signature `T(T,T)`
788  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
789  /// \return the reduced quantity wrapped in a `RResultPtr`.
790  ///
791  /// A reduction takes two values of a column and merges them into one (e.g.
792  /// by summing them, taking the maximum, etc). This action performs the
793  /// specified reduction operation on all processed column values, returning
794  /// a single value of the same type. The callable f must satisfy the general
795  /// requirements of a *processing function* besides having signature `T(T,T)`
796  /// where `T` is the type of column columnName.
797  ///
798  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
799  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
800  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
801  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
802  /// overload.
803  ///
804  /// ### Example usage:
805  /// ~~~{.cpp}
806  /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
807  /// ~~~
808  ///
809  /// This action is *lazy*: upon invocation of this method the calculation is
810  /// booked but not executed. See RResultPtr documentation.
811  // clang-format on
812  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
813  RResultPtr<T> Reduce(F f, std::string_view columnName = "")
814  {
815  static_assert(
816  std::is_default_constructible<T>::value,
817  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
818  return Reduce(std::move(f), columnName, T());
819  }
820 
821  ////////////////////////////////////////////////////////////////////////////
822  /// \brief Execute a user-defined reduce operation on the values of a column.
823  /// \tparam F The type of the reduce callable. Automatically deduced.
824  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
825  /// \param[in] f A callable with signature `T(T,T)`
826  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
827  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
828  /// \return the reduced quantity wrapped in a `RResultPtr`.
829  ///
830  /// ### Example usage:
831  /// ~~~{.cpp}
832  /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
833  /// ~~~
834  /// See the description of the first Reduce overload for more information.
835  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
836  RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
837  {
838  return Aggregate(f, f, columnName, redIdentity);
839  }
840 
841  ////////////////////////////////////////////////////////////////////////////
842  /// \brief Return the number of entries processed (*lazy action*)
843  /// \return the number of entries wrapped in a `RResultPtr`.
844  ///
845  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
846  /// This action is *lazy*: upon invocation of this method the calculation is
847  /// booked but not executed. See RResultPtr documentation.
848  ///
849  /// ### Example usage:
850  /// ~~~{.cpp}
851  /// auto nEntriesAfterCuts = myFilteredDf.Count();
852  /// ~~~
853  ///
855  {
856  const auto nSlots = fLoopManager->GetNSlots();
857  auto cSPtr = std::make_shared<ULong64_t>(0);
858  using Helper_t = RDFInternal::CountHelper;
860  auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
862  fLoopManager->Book(action.get());
863  return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
864  }
865 
866  ////////////////////////////////////////////////////////////////////////////
867  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
868  /// \tparam T The type of the column.
869  /// \tparam COLL The type of collection used to store the values.
870  /// \param[in] column The name of the column to collect the values of.
871  /// \return the content of the selected column wrapped in a `RResultPtr`.
872  ///
873  /// The collection type to be specified for C-style array columns is `RVec<T>`:
874  /// in this case the returned collection is a `std::vector<RVec<T>>`.
875  /// ### Example usage:
876  /// ~~~{.cpp}
877  /// // In this case intCol is a std::vector<int>
878  /// auto intCol = rdf.Take<int>("integerColumn");
879  /// // Same content as above but in this case taken as a RVec<int>
880  /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
881  /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
882  /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
883  /// ~~~
884  /// This action is *lazy*: upon invocation of this method the calculation is
885  /// booked but not executed. See RResultPtr documentation.
886  template <typename T, typename COLL = std::vector<T>>
887  RResultPtr<COLL> Take(std::string_view column = "")
888  {
889  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
890 
891  const auto validColumnNames = GetValidatedColumnNames(1, columns);
892 
893  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
894 
895  using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
897  auto valuesPtr = std::make_shared<COLL>();
898  const auto nSlots = fLoopManager->GetNSlots();
899 
900  auto action =
901  std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
902  fLoopManager->Book(action.get());
903  return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
904  }
905 
906  ////////////////////////////////////////////////////////////////////////////
907  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
908  /// \tparam V The type of the column used to fill the histogram.
909  /// \param[in] model The returned histogram will be constructed using this as a model.
910  /// \param[in] vName The name of the column that will fill the histogram.
911  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
912  ///
913  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
914  /// is filled with each one of the elements of the container. In case multiple columns of container type
915  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
916  /// possibly different lengths between events).
917  /// This action is *lazy*: upon invocation of this method the calculation is
918  /// booked but not executed. See RResultPtr documentation.
919  ///
920  /// ### Example usage:
921  /// ~~~{.cpp}
922  /// // Deduce column type (this invocation needs jitting internally)
923  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
924  /// // Explicit column type
925  /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
926  /// ~~~
927  ///
928  template <typename V = RDFDetail::RInferredType>
929  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
930  {
931  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
932 
933  const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
934 
935  std::shared_ptr<::TH1D> h(nullptr);
936  {
937  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
938  h = model.GetHistogram();
939  h->SetDirectory(nullptr);
940  }
941 
942  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
943  RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
944  return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
945  }
946 
947  ////////////////////////////////////////////////////////////////////////////
948  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
949  /// \tparam V The type of the column used to fill the histogram.
950  /// \param[in] vName The name of the column that will fill the histogram.
951  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
952  ///
953  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
954  /// The "name" and "title" strings are built starting from the input column name.
955  /// See the description of the first Histo1D overload for more details.
956  ///
957  /// ### Example usage:
958  /// ~~~{.cpp}
959  /// // Deduce column type (this invocation needs jitting internally)
960  /// auto myHist1 = myDf.Histo1D("myColumn");
961  /// // Explicit column type
962  /// auto myHist2 = myDf.Histo1D<float>("myColumn");
963  /// ~~~
964  ///
965  template <typename V = RDFDetail::RInferredType>
966  RResultPtr<::TH1D> Histo1D(std::string_view vName)
967  {
968  const auto h_name = std::string(vName);
969  const auto h_title = h_name + ";" + h_name + ";count";
970  return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
971  }
972 
973  ////////////////////////////////////////////////////////////////////////////
974  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
975  /// \tparam V The type of the column used to fill the histogram.
976  /// \tparam W The type of the column used as weights.
977  /// \param[in] model The returned histogram will be constructed using this as a model.
978  /// \param[in] vName The name of the column that will fill the histogram.
979  /// \param[in] wName The name of the column that will provide the weights.
980  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
981  ///
982  /// See the description of the first Histo1D overload for more details.
983  ///
984  /// ### Example usage:
985  /// ~~~{.cpp}
986  /// // Deduce column type (this invocation needs jitting internally)
987  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
988  /// // Explicit column type
989  /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
990  /// ~~~
991  ///
992  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
993  RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
994  {
995  const std::vector<std::string_view> columnViews = {vName, wName};
996  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
997  ? ColumnNames_t()
998  : ColumnNames_t(columnViews.begin(), columnViews.end());
999  std::shared_ptr<::TH1D> h(nullptr);
1000  {
1001  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1002  h = model.GetHistogram();
1003  }
1004  return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1005  }
1006 
1007  ////////////////////////////////////////////////////////////////////////////
1008  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1009  /// \tparam V The type of the column used to fill the histogram.
1010  /// \tparam W The type of the column used as weights.
1011  /// \param[in] vName The name of the column that will fill the histogram.
1012  /// \param[in] wName The name of the column that will provide the weights.
1013  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1014  ///
1015  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1016  /// The "name" and "title" strings are built starting from the input column names.
1017  /// See the description of the first Histo1D overload for more details.
1018  ///
1019  /// ### Example usage:
1020  /// ~~~{.cpp}
1021  /// // Deduce column types (this invocation needs jitting internally)
1022  /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1023  /// // Explicit column types
1024  /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1025  /// ~~~
1026  ///
1027  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1028  RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1029  {
1030  // We build name and title based on the value and weight column names
1031  std::string str_vName{vName};
1032  std::string str_wName{wName};
1033  const auto h_name = str_vName + "_weighted_" + str_wName;
1034  const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1035  return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1036  }
1037 
1038  ////////////////////////////////////////////////////////////////////////////
1039  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1040  /// \tparam V The type of the column used to fill the histogram.
1041  /// \tparam W The type of the column used as weights.
1042  /// \param[in] model The returned histogram will be constructed using this as a model.
1043  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1044  ///
1045  /// This overload will use the first two default columns as column names.
1046  /// See the description of the first Histo1D overload for more details.
1047  template <typename V, typename W>
1048  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1049  {
1050  return Histo1D<V, W>(model, "", "");
1051  }
1052 
1053  ////////////////////////////////////////////////////////////////////////////
1054  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1055  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1056  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1057  /// \param[in] model The returned histogram will be constructed using this as a model.
1058  /// \param[in] v1Name The name of the column that will fill the x axis.
1059  /// \param[in] v2Name The name of the column that will fill the y axis.
1060  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1061  ///
1062  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1063  /// is filled with each one of the elements of the container. In case multiple columns of container type
1064  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1065  /// possibly different lengths between events).
1066  /// This action is *lazy*: upon invocation of this method the calculation is
1067  /// booked but not executed. See RResultPtr documentation.
1068  ///
1069  /// ### Example usage:
1070  /// ~~~{.cpp}
1071  /// // Deduce column types (this invocation needs jitting internally)
1072  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1073  /// // Explicit column types
1074  /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1075  /// ~~~
1076  ///
1077  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1078  RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1079  {
1080  std::shared_ptr<::TH2D> h(nullptr);
1081  {
1082  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1083  h = model.GetHistogram();
1084  }
1085  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1086  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1087  }
1088  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1089  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1090  ? ColumnNames_t()
1091  : ColumnNames_t(columnViews.begin(), columnViews.end());
1092  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1093  }
1094 
1095  ////////////////////////////////////////////////////////////////////////////
1096  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1097  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1098  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1099  /// \tparam W The type of the column used for the weights of the histogram.
1100  /// \param[in] model The returned histogram will be constructed using this as a model.
1101  /// \param[in] v1Name The name of the column that will fill the x axis.
1102  /// \param[in] v2Name The name of the column that will fill the y axis.
1103  /// \param[in] wName The name of the column that will provide the weights.
1104  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1105  ///
1106  /// This action is *lazy*: upon invocation of this method the calculation is
1107  /// booked but not executed. See RResultPtr documentation.
1108  /// The user gives up ownership of the model histogram.
1109  ///
1110  /// ### Example usage:
1111  /// ~~~{.cpp}
1112  /// // Deduce column types (this invocation needs jitting internally)
1113  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1114  /// // Explicit column types
1115  /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1116  /// ~~~
1117  ///
1118  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1119  typename W = RDFDetail::RInferredType>
1121  Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1122  {
1123  std::shared_ptr<::TH2D> h(nullptr);
1124  {
1125  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1126  h = model.GetHistogram();
1127  }
1128  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1129  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1130  }
1131  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1132  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1133  ? ColumnNames_t()
1134  : ColumnNames_t(columnViews.begin(), columnViews.end());
1135  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1136  }
1137 
1138  template <typename V1, typename V2, typename W>
1140  {
1141  return Histo2D<V1, V2, W>(model, "", "", "");
1142  }
1143 
1144  ////////////////////////////////////////////////////////////////////////////
1145  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1146  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1147  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1148  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1149  /// \param[in] model The returned histogram will be constructed using this as a model.
1150  /// \param[in] v1Name The name of the column that will fill the x axis.
1151  /// \param[in] v2Name The name of the column that will fill the y axis.
1152  /// \param[in] v3Name The name of the column that will fill the z axis.
1153  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1154  ///
1155  /// This action is *lazy*: upon invocation of this method the calculation is
1156  /// booked but not executed. See RResultPtr documentation.
1157  ///
1158  /// ### Example usage:
1159  /// ~~~{.cpp}
1160  /// // Deduce column types (this invocation needs jitting internally)
1161  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1162  /// "myValueX", "myValueY", "myValueZ");
1163  /// // Explicit column types
1164  /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1165  /// "myValueX", "myValueY", "myValueZ");
1166  /// ~~~
1167  ///
1168  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1169  typename V3 = RDFDetail::RInferredType>
1170  RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
1171  std::string_view v3Name = "")
1172  {
1173  std::shared_ptr<::TH3D> h(nullptr);
1174  {
1175  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1176  h = model.GetHistogram();
1177  }
1178  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1179  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1180  }
1181  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1182  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1183  ? ColumnNames_t()
1184  : ColumnNames_t(columnViews.begin(), columnViews.end());
1185  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1186  }
1187 
1188  ////////////////////////////////////////////////////////////////////////////
1189  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1190  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1191  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1192  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1193  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1194  /// \param[in] model The returned histogram will be constructed using this as a model.
1195  /// \param[in] v1Name The name of the column that will fill the x axis.
1196  /// \param[in] v2Name The name of the column that will fill the y axis.
1197  /// \param[in] v3Name The name of the column that will fill the z axis.
1198  /// \param[in] wName The name of the column that will provide the weights.
1199  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1200  ///
1201  /// This action is *lazy*: upon invocation of this method the calculation is
1202  /// booked but not executed. See RResultPtr documentation.
1203  ///
1204  /// ### Example usage:
1205  /// ~~~{.cpp}
1206  /// // Deduce column types (this invocation needs jitting internally)
1207  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1208  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1209  /// // Explicit column types
1210  /// using d_t = double;
1211  /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1212  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1213  /// ~~~
1214  ///
1215  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1216  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1217  RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
1218  std::string_view v3Name, std::string_view wName)
1219  {
1220  std::shared_ptr<::TH3D> h(nullptr);
1221  {
1222  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1223  h = model.GetHistogram();
1224  }
1225  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1226  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1227  }
1228  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1229  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1230  ? ColumnNames_t()
1231  : ColumnNames_t(columnViews.begin(), columnViews.end());
1232  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1233  }
1234 
1235  template <typename V1, typename V2, typename V3, typename W>
1237  {
1238  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1239  }
1240 
1241  ////////////////////////////////////////////////////////////////////////////
1242  /// \brief Fill and return a graph (*lazy action*)
1243  /// \tparam V1 The type of the column used to fill the x axis of the graph.
1244  /// \tparam V2 The type of the column used to fill the y axis of the graph.
1245  /// \param[in] v1Name The name of the column that will fill the x axis.
1246  /// \param[in] v2Name The name of the column that will fill the y axis.
1247  /// \return the graph wrapped in a `RResultPtr`.
1248  ///
1249  /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1250  /// is filled with each one of the elements of the container.
1251  /// If Multithreading is enabled, the order in which points are inserted is undefined.
1252  /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1253  /// A name and a title to the graph is given based on the input column names.
1254  ///
1255  /// This action is *lazy*: upon invocation of this method the calculation is
1256  /// booked but not executed. See RResultPtr documentation.
1257  ///
1258  /// ### Example usage:
1259  /// ~~~{.cpp}
1260  /// // Deduce column types (this invocation needs jitting internally)
1261  /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1262  /// // Explicit column types
1263  /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1264  /// ~~~
1265  ///
1266  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1267  RResultPtr<::TGraph> Graph(std::string_view v1Name = "", std::string_view v2Name = "")
1268  {
1269  auto graph = std::make_shared<::TGraph>();
1270  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1271  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1272  ? ColumnNames_t()
1273  : ColumnNames_t(columnViews.begin(), columnViews.end());
1274 
1275  const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1276 
1277  // We build a default name and title based on the input columns
1278  if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1279  const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1280  const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1281  graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1282  graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1283  graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1284  }
1285 
1286  return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1287  }
1288 
1289  ////////////////////////////////////////////////////////////////////////////
1290  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1291  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1292  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1293  /// \param[in] model The model to be considered to build the new return value.
1294  /// \param[in] v1Name The name of the column that will fill the x axis.
1295  /// \param[in] v2Name The name of the column that will fill the y axis.
1296  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1297  ///
1298  /// This action is *lazy*: upon invocation of this method the calculation is
1299  /// booked but not executed. See RResultPtr documentation.
1300  ///
1301  /// ### Example usage:
1302  /// ~~~{.cpp}
1303  /// // Deduce column types (this invocation needs jitting internally)
1304  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1305  /// // Explicit column types
1306  /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1307  /// ~~~
1308  ///
1309  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1311  Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1312  {
1313  std::shared_ptr<::TProfile> h(nullptr);
1314  {
1315  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1316  h = model.GetProfile();
1317  }
1318 
1319  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1320  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1321  }
1322  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1323  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1324  ? ColumnNames_t()
1325  : ColumnNames_t(columnViews.begin(), columnViews.end());
1326  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1327  }
1328 
1329  ////////////////////////////////////////////////////////////////////////////
1330  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1331  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1332  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1333  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1334  /// \param[in] model The model to be considered to build the new return value.
1335  /// \param[in] v1Name The name of the column that will fill the x axis.
1336  /// \param[in] v2Name The name of the column that will fill the y axis.
1337  /// \param[in] wName The name of the column that will provide the weights.
1338  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1339  ///
1340  /// This action is *lazy*: upon invocation of this method the calculation is
1341  /// booked but not executed. See RResultPtr documentation.
1342  ///
1343  /// ### Example usage:
1344  /// ~~~{.cpp}
1345  /// // Deduce column types (this invocation needs jitting internally)
1346  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1347  /// // Explicit column types
1348  /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1349  /// "xValues", "yValues", "weight");
1350  /// ~~~
1351  ///
1352  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1353  typename W = RDFDetail::RInferredType>
1355  Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1356  {
1357  std::shared_ptr<::TProfile> h(nullptr);
1358  {
1359  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1360  h = model.GetProfile();
1361  }
1362 
1363  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1364  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1365  }
1366  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1367  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1368  ? ColumnNames_t()
1369  : ColumnNames_t(columnViews.begin(), columnViews.end());
1370  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1371  }
1372 
1373  template <typename V1, typename V2, typename W>
1375  {
1376  return Profile1D<V1, V2, W>(model, "", "", "");
1377  }
1378 
1379  ////////////////////////////////////////////////////////////////////////////
1380  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1381  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1382  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1383  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1384  /// \param[in] model The returned profile will be constructed using this as a model.
1385  /// \param[in] v1Name The name of the column that will fill the x axis.
1386  /// \param[in] v2Name The name of the column that will fill the y axis.
1387  /// \param[in] v3Name The name of the column that will fill the z axis.
1388  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1389  ///
1390  /// This action is *lazy*: upon invocation of this method the calculation is
1391  /// booked but not executed. See RResultPtr documentation.
1392  ///
1393  /// ### Example usage:
1394  /// ~~~{.cpp}
1395  /// // Deduce column types (this invocation needs jitting internally)
1396  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1397  /// "xValues", "yValues", "zValues");
1398  /// // Explicit column types
1399  /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1400  /// "xValues", "yValues", "zValues");
1401  /// ~~~
1402  ///
1403  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1404  typename V3 = RDFDetail::RInferredType>
1405  RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
1406  std::string_view v2Name = "", std::string_view v3Name = "")
1407  {
1408  std::shared_ptr<::TProfile2D> h(nullptr);
1409  {
1410  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1411  h = model.GetProfile();
1412  }
1413 
1414  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1415  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1416  }
1417  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1418  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1419  ? ColumnNames_t()
1420  : ColumnNames_t(columnViews.begin(), columnViews.end());
1421  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1422  }
1423 
1424  ////////////////////////////////////////////////////////////////////////////
1425  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1426  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1427  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1428  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1429  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1430  /// \param[in] model The returned histogram will be constructed using this as a model.
1431  /// \param[in] v1Name The name of the column that will fill the x axis.
1432  /// \param[in] v2Name The name of the column that will fill the y axis.
1433  /// \param[in] v3Name The name of the column that will fill the z axis.
1434  /// \param[in] wName The name of the column that will provide the weights.
1435  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1436  ///
1437  /// This action is *lazy*: upon invocation of this method the calculation is
1438  /// booked but not executed. See RResultPtr documentation.
1439  ///
1440  /// ### Example usage:
1441  /// ~~~{.cpp}
1442  /// // Deduce column types (this invocation needs jitting internally)
1443  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1444  /// "xValues", "yValues", "zValues", "weight");
1445  /// // Explicit column types
1446  /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1447  /// "xValues", "yValues", "zValues", "weight");
1448  /// ~~~
1449  ///
1450  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1451  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1452  RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
1453  std::string_view v3Name, std::string_view wName)
1454  {
1455  std::shared_ptr<::TProfile2D> h(nullptr);
1456  {
1457  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1458  h = model.GetProfile();
1459  }
1460 
1461  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1462  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1463  }
1464  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1465  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1466  ? ColumnNames_t()
1467  : ColumnNames_t(columnViews.begin(), columnViews.end());
1468  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1469  }
1470 
1471  template <typename V1, typename V2, typename V3, typename W>
1473  {
1474  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1475  }
1476 
1477  ////////////////////////////////////////////////////////////////////////////
1478  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1479  ///
1480  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1481  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1482  /// specified columns (these types are passed as template parameters to this method).
1483  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1484  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1485  /// \tparam T The type of the object to fill. Automatically deduced.
1486  /// \param[in] model The model to be considered to build the new return value.
1487  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1488  /// \return the filled object wrapped in a `RResultPtr`.
1489  ///
1490  /// The user gives up ownership of the model object.
1491  /// The list of column names to be used for filling must always be specified.
1492  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1493  /// See RResultPtr documentation.
1494  ///
1495  /// ### Example usage:
1496  /// ~~~{.cpp}
1497  /// MyClass obj;
1498  /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1499  /// ~~~
1500  ///
1501  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1502  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1503  {
1504  auto h = std::make_shared<T>(std::forward<T>(model));
1505  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1506  throw std::runtime_error("The absence of axes limits is not supported yet.");
1507  }
1508  return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1509  }
1510 
1511  ////////////////////////////////////////////////////////////////////////////
1512  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1513  ///
1514  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1515  /// method with these types. See previous overload for more information.
1516  /// \tparam T The type of the object to fill. Automatically deduced.
1517  /// \param[in] model The model to be considered to build the new return value.
1518  /// \param[in] columnList The name of the columns read to fill the object.
1519  /// \return the filled object wrapped in a `RResultPtr`.
1520  ///
1521  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1522  /// previous overload. Check the previous overload for more details on `Fill`.
1523  ///
1524  /// ### Example usage:
1525  /// ~~~{.cpp}
1526  /// MyClass obj;
1527  /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1528  /// ~~~
1529  ///
1530  template <typename T>
1531  RResultPtr<T> Fill(T &&model, const ColumnNames_t &bl)
1532  {
1533  auto h = std::make_shared<T>(std::forward<T>(model));
1534  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1535  throw std::runtime_error("The absence of axes limits is not supported yet.");
1536  }
1537  return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1538  }
1539 
1540  ////////////////////////////////////////////////////////////////////////////
1541  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1542  ///
1543  /// \tparam V The type of the value column
1544  /// \param[in] value The name of the column with the values to fill the statistics with.
1545  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1546  ///
1547  /// ### Example usage:
1548  /// ~~~{.cpp}
1549  /// // Deduce column type (this invocation needs jitting internally)
1550  /// auto stats0 = myDf.Stats("values");
1551  /// // Explicit column type
1552  /// auto stats1 = myDf.Stats<float>("values");
1553  /// ~~~
1554  ///
1555  template<typename V = RDFDetail::RInferredType>
1556  RResultPtr<TStatistic> Stats(std::string_view value = "")
1557  {
1558  ColumnNames_t columns;
1559  if (!value.empty()) {
1560  columns.emplace_back(std::string(value));
1561  }
1562  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1563  if (std::is_same<V, RDFDetail::RInferredType>::value) {
1564  return Fill(TStatistic(), validColumnNames);
1565  }
1566  else {
1567  return Fill<V>(TStatistic(), validColumnNames);
1568  }
1569  }
1570 
1571  ////////////////////////////////////////////////////////////////////////////
1572  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1573  ///
1574  /// \tparam V The type of the value column
1575  /// \tparam W The type of the weight column
1576  /// \param[in] value The name of the column with the values to fill the statistics with.
1577  /// \param[in] weight The name of the column with the weights to fill the statistics with.
1578  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1579  ///
1580  /// ### Example usage:
1581  /// ~~~{.cpp}
1582  /// // Deduce column types (this invocation needs jitting internally)
1583  /// auto stats0 = myDf.Stats("values", "weights");
1584  /// // Explicit column types
1585  /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1586  /// ~~~
1587  ///
1588  template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1589  RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
1590  {
1591  ColumnNames_t columns {std::string(value), std::string(weight)};
1592  constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1593  constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1594  const auto validColumnNames = GetValidatedColumnNames(2, columns);
1595  // We have 3 cases:
1596  // 1. Both types are inferred: we use Fill and let the jit kick in.
1597  // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1598  // 3. Both types are explicit: we invoke the fully compiled Fill method.
1599  if (vIsInferred && wIsInferred) {
1600  return Fill(TStatistic(), validColumnNames);
1601  } else if (vIsInferred != wIsInferred) {
1602  std::string error("The ");
1603  error += vIsInferred ? "value " : "weight ";
1604  error += "column type is explicit, while the ";
1605  error += vIsInferred ? "weight " : "value ";
1606  error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1607  throw std::runtime_error(error);
1608  } else {
1609  return Fill<V, W>(TStatistic(), validColumnNames);
1610  }
1611  }
1612 
1613  ////////////////////////////////////////////////////////////////////////////
1614  /// \brief Return the minimum of processed column values (*lazy action*)
1615  /// \tparam T The type of the branch/column.
1616  /// \param[in] columnName The name of the branch/column to be treated.
1617  /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1618  ///
1619  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1620  /// template specialization of this method.
1621  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1622  ///
1623  /// This action is *lazy*: upon invocation of this method the calculation is
1624  /// booked but not executed. See RResultPtr documentation.
1625  ///
1626  /// ### Example usage:
1627  /// ~~~{.cpp}
1628  /// // Deduce column type (this invocation needs jitting internally)
1629  /// auto minVal0 = myDf.Min("values");
1630  /// // Explicit column type
1631  /// auto minVal1 = myDf.Min<double>("values");
1632  /// ~~~
1633  ///
1634  template <typename T = RDFDetail::RInferredType>
1635  RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
1636  {
1637  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1638  using RetType_t = RDFDetail::MinReturnType_t<T>;
1639  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1640  return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1641  }
1642 
1643  ////////////////////////////////////////////////////////////////////////////
1644  /// \brief Return the maximum of processed column values (*lazy action*)
1645  /// \tparam T The type of the branch/column.
1646  /// \param[in] columnName The name of the branch/column to be treated.
1647  /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1648  ///
1649  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1650  /// template specialization of this method.
1651  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1652  ///
1653  /// This action is *lazy*: upon invocation of this method the calculation is
1654  /// booked but not executed. See RResultPtr documentation.
1655  ///
1656  /// ### Example usage:
1657  /// ~~~{.cpp}
1658  /// // Deduce column type (this invocation needs jitting internally)
1659  /// auto maxVal0 = myDf.Max("values");
1660  /// // Explicit column type
1661  /// auto maxVal1 = myDf.Max<double>("values");
1662  /// ~~~
1663  ///
1664  template <typename T = RDFDetail::RInferredType>
1665  RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
1666  {
1667  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1668  using RetType_t = RDFDetail::MaxReturnType_t<T>;
1669  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1670  return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1671  }
1672 
1673  ////////////////////////////////////////////////////////////////////////////
1674  /// \brief Return the mean of processed column values (*lazy action*)
1675  /// \tparam T The type of the branch/column.
1676  /// \param[in] columnName The name of the branch/column to be treated.
1677  /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1678  ///
1679  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1680  /// template specialization of this method.
1681  ///
1682  /// This action is *lazy*: upon invocation of this method the calculation is
1683  /// booked but not executed. See RResultPtr documentation.
1684  ///
1685  /// ### Example usage:
1686  /// ~~~{.cpp}
1687  /// // Deduce column type (this invocation needs jitting internally)
1688  /// auto meanVal0 = myDf.Mean("values");
1689  /// // Explicit column type
1690  /// auto meanVal1 = myDf.Mean<double>("values");
1691  /// ~~~
1692  ///
1693  template <typename T = RDFDetail::RInferredType>
1694  RResultPtr<double> Mean(std::string_view columnName = "")
1695  {
1696  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1697  auto meanV = std::make_shared<double>(0);
1698  return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1699  }
1700 
1701  ////////////////////////////////////////////////////////////////////////////
1702  /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1703  /// \tparam T The type of the branch/column.
1704  /// \param[in] columnName The name of the branch/column to be treated.
1705  /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1706  ///
1707  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1708  /// template specialization of this method.
1709  ///
1710  /// This action is *lazy*: upon invocation of this method the calculation is
1711  /// booked but not executed. See RResultPtr documentation.
1712  ///
1713  /// ### Example usage:
1714  /// ~~~{.cpp}
1715  /// // Deduce column type (this invocation needs jitting internally)
1716  /// auto stdDev0 = myDf.StdDev("values");
1717  /// // Explicit column type
1718  /// auto stdDev1 = myDf.StdDev<double>("values");
1719  /// ~~~
1720  ///
1721  template <typename T = RDFDetail::RInferredType>
1722  RResultPtr<double> StdDev(std::string_view columnName = "")
1723  {
1724  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1725  auto stdDeviationV = std::make_shared<double>(0);
1726  return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1727  }
1728 
1729  // clang-format off
1730  ////////////////////////////////////////////////////////////////////////////
1731  /// \brief Return the sum of processed column values (*lazy action*)
1732  /// \tparam T The type of the branch/column.
1733  /// \param[in] columnName The name of the branch/column.
1734  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1735  /// \return the sum of the selected column wrapped in a `RResultPtr`.
1736  ///
1737  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1738  /// template specialization of this method.
1739  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1740  ///
1741  /// This action is *lazy*: upon invocation of this method the calculation is
1742  /// booked but not executed. See RResultPtr documentation.
1743  ///
1744  /// ### Example usage:
1745  /// ~~~{.cpp}
1746  /// // Deduce column type (this invocation needs jitting internally)
1747  /// auto sum0 = myDf.Sum("values");
1748  /// // Explicit column type
1749  /// auto sum1 = myDf.Sum<double>("values");
1750  /// ~~~
1751  ///
1752  template <typename T = RDFDetail::RInferredType>
1754  Sum(std::string_view columnName = "",
1755  const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1756  {
1757  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1758  auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1759  return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1760  }
1761  // clang-format on
1762 
1763  ////////////////////////////////////////////////////////////////////////////
1764  /// \brief Gather filtering statistics
1765  /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1766  ///
1767  /// Calling `Report` on the main `RDataFrame` object gathers stats for
1768  /// all named filters in the call graph. Calling this method on a
1769  /// stored chain state (i.e. a graph node different from the first) gathers
1770  /// the stats for all named filters in the chain section between the original
1771  /// `RDataFrame` and that node (included). Stats are gathered in the same
1772  /// order as the named filters have been added to the graph.
1773  /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1774  /// effects cuts had.
1775  ///
1776  /// This action is *lazy*: upon invocation of
1777  /// this method the calculation is booked but not executed. See RResultPtr
1778  /// documentation.
1779  ///
1780  /// ### Example usage:
1781  /// ~~~{.cpp}
1782  /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1783  /// auto cutReport = filtered3.Report();
1784  /// cutReport->Print();
1785  /// ~~~
1786  ///
1788  {
1789  bool returnEmptyReport = false;
1790  // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1791  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1792  // certainly does not contain named filters.
1793  // The number 4 takes into account the implicit columns for entry and slot number
1794  // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1795  if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1796  returnEmptyReport = true;
1797 
1798  auto rep = std::make_shared<RCutFlowReport>();
1799  using Helper_t = RDFInternal::ReportHelper<Proxied>;
1800  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1801 
1802  auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1804 
1805  fLoopManager->Book(action.get());
1806  return MakeResultPtr(rep, *fLoopManager, std::move(action));
1807  }
1808 
1809  /////////////////////////////////////////////////////////////////////////////
1810  /// \brief Returns the names of the available columns
1811  /// \return the container of column names.
1812  ///
1813  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1814  ///
1815  /// ### Example usage:
1816  /// ~~~{.cpp}
1817  /// auto colNames = d.GetColumnNames();
1818  /// // Print columns' names
1819  /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1820  /// ~~~
1821  ///
1823  {
1824  ColumnNames_t allColumns;
1825 
1826  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1827  if (!RDFInternal::IsInternalColumn(colName))
1828  allColumns.emplace_back(colName);
1829  };
1830 
1831  auto columnNames = fCustomColumns.GetNames();
1832 
1833  std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1834 
1835  auto tree = fLoopManager->GetTree();
1836  if (tree) {
1837  auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1838  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1839  }
1840 
1841  if (fDataSource) {
1842  auto &dsColNames = fDataSource->GetColumnNames();
1843  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1844  }
1845 
1846  return allColumns;
1847  }
1848 
1849  /////////////////////////////////////////////////////////////////////////////
1850  /// \brief Return the type of a given column as a string.
1851  /// \return the type of the required column.
1852  ///
1853  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1854  ///
1855  /// ### Example usage:
1856  /// ~~~{.cpp}
1857  /// auto colType = d.GetColumnType("columnName");
1858  /// // Print column type
1859  /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1860  /// ~~~
1861  ///
1862  std::string GetColumnType(std::string_view column)
1863  {
1864  const auto col = std::string(column);
1865  const bool convertVector2RVec = true;
1866  RDFDetail::RCustomColumnBase *customCol =
1867  fCustomColumns.HasName(column) ? fCustomColumns.GetColumns().at(col).get() : nullptr;
1869  customCol, convertVector2RVec);
1870  }
1871 
1872  /// \brief Returns the names of the filters created.
1873  /// \return the container of filters names.
1874  ///
1875  /// If called on a root node, all the filters in the computation graph will
1876  /// be printed. For any other node, only the filters upstream of that node.
1877  /// Filters without a name are printed as "Unnamed Filter"
1878  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1879  ///
1880  /// ### Example usage:
1881  /// ~~~{.cpp}
1882  /// auto filtNames = d.GetFilterNames();
1883  /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1884  /// ~~~
1885  ///
1886  std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1887 
1888  /// \brief Returns the names of the defined columns
1889  /// \return the container of the defined column names.
1890  ///
1891  /// This is not an action nor a transformation, just a simple utility to
1892  /// get the columns names that have been defined up to the node.
1893  /// If no custom column has been defined, e.g. on a root node, it returns an
1894  /// empty collection.
1895  ///
1896  /// ### Example usage:
1897  /// ~~~{.cpp}
1898  /// auto defColNames = d.GetDefinedColumnNames();
1899  /// // Print defined columns' names
1900  /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1901  /// ~~~
1902  ///
1904  {
1905  ColumnNames_t definedColumns;
1906 
1907  auto columns = fCustomColumns.GetColumns();
1908 
1909  for (auto column : columns) {
1910  if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1911  definedColumns.emplace_back(column.first);
1912  }
1913 
1914  return definedColumns;
1915  }
1916 
1917  /// \brief Checks if a column is present in the dataset
1918  /// \return true if the column is available, false otherwise
1919  ///
1920  /// This method checks if a column is part of the input ROOT dataset, has
1921  /// been defined or can be provided by the data source.
1922  ///
1923  /// Example usage:
1924  /// ~~~{.cpp}
1925  /// ROOT::RDataFrame base(1);
1926  /// auto rdf = base.Define("definedColumn", [](){return 0;});
1927  /// rdf.HasColumn("definedColumn"); // true: we defined it
1928  /// rdf.HasColumn("rdfentry_"); // true: it's always there
1929  /// rdf.HasColumn("foo"); // false: it is not there
1930  /// ~~~
1931  bool HasColumn(std::string_view columnName)
1932  {
1933  if (fCustomColumns.HasName(columnName))
1934  return true;
1935 
1936  if (auto tree = fLoopManager->GetTree()) {
1937  const auto &branchNames = fLoopManager->GetBranchNames();
1938  const auto branchNamesEnd = branchNames.end();
1939  if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1940  return true;
1941  }
1942 
1943  if (fDataSource && fDataSource->HasColumn(columnName))
1944  return true;
1945 
1946  return false;
1947  }
1948 
1949  /// \brief Gets the number of data processing slots
1950  /// \return The number of data processing slots used by this RDataFrame instance
1951  ///
1952  /// This method returns the number of data processing slots used by this RDataFrame
1953  /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1954  ///
1955  /// Example usage:
1956  /// ~~~{.cpp}
1957  /// ROOT::EnableImplicitMT(6)
1958  /// ROOT::RDataFrame df(1);
1959  /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1960  /// ~~~
1961  unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1962 
1963  /// \brief Gets the number of event loops run
1964  /// \return The number of event loops run by this RDataFrame instance
1965  ///
1966  /// This method returns the number of events loops run so far by this RDataFrame instance.
1967  ///
1968  /// Example usage:
1969  /// ~~~{.cpp}
1970  /// ROOT::RDataFrame df(1);
1971  /// std::cout << df.GetNRuns() << std::endl; // prints "0"
1972  /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
1973  /// std::cout << df.GetNRuns() << std::endl; // prints "1"
1974  /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
1975  /// std::cout << df.GetNRuns() << std::endl; // prints "2"
1976  /// ~~~
1977  unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
1978 
1979  // clang-format off
1980  ////////////////////////////////////////////////////////////////////////////
1981  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1982  /// \tparam F The type of the aggregator callable. Automatically deduced.
1983  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1984  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1985  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1986  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1987  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1988  /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
1989  /// \return the result of the aggregation wrapped in a `RResultPtr`.
1990  ///
1991  /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
1992  /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
1993  /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
1994  /// the value of the column columnName.
1995  /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
1996  /// Otherwise the signature of aggregator must be `void(U&,T)`.
1997  ///
1998  /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
1999  /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2000  /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2001  ///
2002  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2003  ///
2004  /// Example usage:
2005  /// ~~~{.cpp}
2006  /// auto aggregator = [](double acc, double x) { return acc * x; };
2007  /// ROOT::EnableImplicitMT();
2008  /// // If multithread is enabled, the aggregator function will be called by more threads
2009  /// // and will produce a vector of partial accumulators.
2010  /// // The merger function performs the final aggregation of these partial results.
2011  /// auto merger = [](std::vector<double> &accumulators) {
2012  /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2013  /// accumulators[0] *= accumulators[i];
2014  /// }
2015  /// };
2016  ///
2017  /// // The accumulator is initialized at this value by every thread.
2018  /// double initValue = 1.;
2019  ///
2020  /// // Multiplies all elements of the column "x"
2021  /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2022  /// ~~~
2023  // clang-format on
2024  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2025  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2026  typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2029  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2030  {
2031  RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2032  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2033  constexpr auto nColumns = ArgTypes::list_size;
2034 
2035  const auto validColumnNames = GetValidatedColumnNames(1, columns);
2036 
2037  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2038 
2039  auto accObjPtr = std::make_shared<U>(aggIdentity);
2040  using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2041  using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2042  auto action = std::make_unique<Action_t>(
2043  Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2044  fProxiedPtr, std::move(newColumns));
2045  fLoopManager->Book(action.get());
2046  return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2047  }
2048 
2049  // clang-format off
2050  ////////////////////////////////////////////////////////////////////////////
2051  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2052  /// \tparam F The type of the aggregator callable. Automatically deduced.
2053  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2054  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2055  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2056  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2057  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2058  /// \return the result of the aggregation wrapped in a `RResultPtr`.
2059  ///
2060  /// See previous Aggregate overload for more information.
2061  // clang-format on
2062  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2063  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2066  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2067  {
2068  static_assert(
2069  std::is_default_constructible<U>::value,
2070  "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2071  return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2072  }
2073 
2074  // clang-format off
2075  ////////////////////////////////////////////////////////////////////////////
2076  /// \brief Book execution of a custom action using a user-defined helper object.
2077  /// \tparam ColumnTypes List of types of columns used by this action.
2078  /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2079  /// \param[in] helper The Action Helper to be scheduled.
2080  /// \param[in] columns The names of the columns on which the helper acts.
2081  /// \return the result of the helper wrapped in a `RResultPtr`.
2082  ///
2083  /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2084  /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2085  /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2086  ///
2087  /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2088  /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2089  /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2090  /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2091  /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2092  /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2093  /// the requested columns for the particular entry being processed.
2094  /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2095  /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2096  /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2097  /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2098  /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2099  /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2100  /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2101  /// method concurrently, but will always pass different 'slot' numbers.
2102  /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2103  /// Result_t). The RResultPtr returned by Book will point to this object.
2104  ///
2105  /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2106  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2107  // clang-format on
2108  template <typename... ColumnTypes, typename Helper>
2109  RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2110  {
2111  constexpr auto nColumns = sizeof...(ColumnTypes);
2112  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2113 
2114  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2115 
2116  // TODO add more static sanity checks on Helper
2117  using AH = RDFDetail::RActionImpl<Helper>;
2118  static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2119  "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2120 
2121  using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2122  auto resPtr = helper.GetResultPtr();
2123 
2124  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2125  RDFInternal::TypeList<ColumnTypes...>());
2126 
2127  auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2128  RDFInternal::RBookedCustomColumns(newColumns));
2129  fLoopManager->Book(action.get());
2130  return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2131  }
2132 
2133  ////////////////////////////////////////////////////////////////////////////
2134  /// \brief Provides a representation of the columns in the dataset
2135  /// \tparam ColumnTypes variadic list of branch/column types.
2136  /// \param[in] columnList Names of the columns to be displayed.
2137  /// \param[in] rows Number of events for each column to be displayed.
2138  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2139  ///
2140  /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2141  /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2142  /// complete version through `AsString()`.
2143  ///
2144  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2145  ///
2146  /// Example usage:
2147  /// ~~~{.cpp}
2148  /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2149  /// auto d1 = rdf.Display("");
2150  /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2151  /// auto d2 = d.Display({"x", "y"}, 128);
2152  /// // Printing the short representations, the event loop will run
2153  /// d1->Print();
2154  /// d2->Print();
2155  /// ~~~
2156  template <typename... ColumnTypes>
2157  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2158  {
2159  CheckIMTDisabled("Display");
2160 
2161  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2162  return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2163  }
2164 
2165  ////////////////////////////////////////////////////////////////////////////
2166  /// \brief Provides a representation of the columns in the dataset
2167  /// \param[in] columnList Names of the columns to be displayed.
2168  /// \param[in] rows Number of events for each column to be displayed.
2169  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2170  ///
2171  /// This overload automatically infers the column types.
2172  /// See the previous overloads for further details.
2173  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2174  {
2175  CheckIMTDisabled("Display");
2176  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2177  return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2178  columnList.size());
2179  }
2180 
2181  ////////////////////////////////////////////////////////////////////////////
2182  /// \brief Provides a representation of the columns in the dataset
2183  /// \param[in] columnNameRegexp A regular expression to select the columns.
2184  /// \param[in] rows Number of events for each column to be displayed.
2185  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2186  ///
2187  /// The existing columns are matched against the regular expression. If the string provided
2188  /// is empty, all columns are selected.
2189  /// See the previous overloads for further details.
2190  RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2191  {
2193  columnNameRegexp, "Display");
2194  return Display(selectedColumns, nRows);
2195  }
2196 
2197  ////////////////////////////////////////////////////////////////////////////
2198  /// \brief Provides a representation of the columns in the dataset
2199  /// \param[in] columnList Names of the columns to be displayed.
2200  /// \param[in] nRows Number of events for each column to be displayed.
2201  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2202  ///
2203  /// See the previous overloads for further details.
2204  RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2205  {
2206  ColumnNames_t selectedColumns(columnList);
2207  return Display(selectedColumns, nRows);
2208  }
2209 
2210 private:
2212  {
2214 
2215  // Entry number column
2216  const std::string entryColName = "rdfentry_";
2217  const std::string entryColType = "ULong64_t";
2218  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2219  using NewColEntry_t =
2221 
2222  auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
2223  ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2224  newCols.AddName(entryColName);
2225  newCols.AddColumn(entryColumn, entryColName);
2226 
2227  // Slot number column
2228  const std::string slotColName = "rdfslot_";
2229  const std::string slotColType = "unsigned int";
2230  auto slotColGen = [](unsigned int slot) { return slot; };
2232 
2233  auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
2234  fLoopManager->GetNSlots(), newCols);
2235  newCols.AddName(slotColName);
2236  newCols.AddColumn(slotColumn, slotColName);
2237 
2238  fCustomColumns = std::move(newCols);
2239 
2240  fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2241  fCustomColumns.AddName("tdfentry_");
2242  fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2243  fCustomColumns.AddName("tdfslot_");
2244  }
2245 
2246  std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2247  {
2248  std::vector<std::string> types;
2249 
2250  for (auto column : columnList) {
2251  types.push_back(GetColumnType(column));
2252  }
2253  return types;
2254  }
2255 
2256  void CheckIMTDisabled(std::string_view callerName)
2257  {
2258  if (ROOT::IsImplicitMTEnabled()) {
2259  std::string error(callerName);
2260  error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2261  throw std::runtime_error(error);
2262  }
2263  }
2264 
2265  // Type was specified by the user, no need to infer it
2266  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2267  typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2268  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2269  {
2270  constexpr auto nColumns = sizeof...(BranchTypes);
2271 
2272  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2273 
2274  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2275  RDFInternal::TypeList<BranchTypes...>());
2276 
2277  const auto nSlots = fLoopManager->GetNSlots();
2278 
2279  auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2280  std::move(newColumns));
2281  fLoopManager->Book(action.get());
2282  return MakeResultPtr(r, *fLoopManager, std::move(action));
2283  }
2284 
2285  // User did not specify type, do type inference
2286  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2287  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2288  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2289  typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2291  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2292  {
2293  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2294 
2295  const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2296  const unsigned int nSlots = fLoopManager->GetNSlots();
2297 
2298  auto tree = fLoopManager->GetTree();
2299  auto rOnHeap = RDFInternal::MakeWeakOnHeap(r);
2300 
2301  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2302  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2303  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2304 
2305  const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager);
2306  auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
2307 
2308  auto toJit = RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap,
2309  typeid(std::weak_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2310  tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap);
2311  fLoopManager->Book(jittedAction.get());
2312  fLoopManager->ToJitExec(toJit);
2313  return MakeResultPtr(r, *fLoopManager, jittedAction);
2314  }
2315 
2316  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2317  typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2318  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2319  {
2323 
2324  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2325  using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2326  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2327  using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2328  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2329 
2330  constexpr auto nColumns = ColTypes_t::list_size;
2331 
2332  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2333 
2334  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2335 
2336  // Declare return type to the interpreter, for future use by jitted actions
2337  auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2338  if (retTypeName.empty()) {
2339  // The type is not known to the interpreter.
2340  // We must not error out here, but if/when this column is used in jitted code
2341  const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2342  retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2343  }
2344 
2346  RDFInternal::RBookedCustomColumns newCols(newColumns);
2347  auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2348  fLoopManager->GetNSlots(), newCols);
2349 
2350  newCols.AddName(name);
2351  newCols.AddColumn(newColumn, name);
2352 
2353  RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2354 
2355  return newInterface;
2356  }
2357 
2358  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2359  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2360  // this way compilation of `Define` has no way to continue after throwing the error.
2361  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2362  bool IsFStringConv = std::is_convertible<F, std::string>::value,
2363  bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2364  typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2365  DefineImpl(std::string_view, F, const ColumnNames_t &)
2366  {
2367  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2368  "Error in `Define`: type returned by expression is not default-constructible");
2369  return *this; // never reached
2370  }
2371 
2372  ////////////////////////////////////////////////////////////////////////////
2373  /// \brief Implementation of snapshot
2374  /// \param[in] treename The name of the TTree
2375  /// \param[in] filename The name of the TFile
2376  /// \param[in] columnList The list of names of the branches to be written
2377  /// The implementation exploits Foreach. The association of the addresses to
2378  /// the branches takes place at the first event. This is possible because
2379  /// since there are no copies, the address of the value passed by reference
2380  /// is the address pointing to the storage of the read/created object in/by
2381  /// the TTreeReaderValue/TemporaryBranch
2382  template <typename... ColumnTypes>
2383  RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view treename, std::string_view filename,
2384  const ColumnNames_t &columnList, const RSnapshotOptions &options)
2385  {
2386  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2387 
2388  const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2389 
2390  auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2391  TTraits::TypeList<ColumnTypes...>());
2392 
2393  const std::string fullTreename(treename);
2394  // split name into directory and treename if needed
2395  const auto lastSlash = treename.rfind('/');
2396  std::string_view dirname = "";
2397  if (std::string_view::npos != lastSlash) {
2398  dirname = treename.substr(0, lastSlash);
2399  treename = treename.substr(lastSlash + 1, treename.size());
2400  }
2401 
2402  // add action node to functional graph and run event loop
2403  std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2404  if (!ROOT::IsImplicitMTEnabled()) {
2405  // single-thread snapshot
2406  using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2407  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2408  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2409  fProxiedPtr, std::move(newColumns)));
2410  } else {
2411  // multi-thread snapshot
2412  using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2413  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2414  actionPtr.reset(new Action_t(
2415  Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2416  fProxiedPtr, std::move(newColumns)));
2417  }
2418 
2419  fLoopManager->Book(actionPtr.get());
2420 
2421  return RDFInternal::CreateSnapshotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2422  std::move(actionPtr));
2423  }
2424 
2425  ////////////////////////////////////////////////////////////////////////////
2426  /// \brief Implementation of cache
2427  template <typename... BranchTypes, std::size_t... S>
2428  RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2429  {
2430  // Check at compile time that the columns types are copy constructible
2431  constexpr bool areCopyConstructible =
2432  RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2433  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2434 
2435  // We share bits and pieces with snapshot. De facto this is a snapshot
2436  // in memory!
2437  RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2438 
2439  auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2440  auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2441 
2442  RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2443 
2444  (void)s; // Prevents unused warning
2445 
2446  return cachedRDF;
2447  }
2448 
2449 protected:
2450  RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2451  const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
2452  : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2453  {
2454  }
2455 
2457 
2458  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2459 
2460  /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2461  /// which is expensive in terms of runtime, is called at most once.
2462  ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2463  {
2465  fDataSource);
2466  }
2467 
2468  template <typename... ColumnTypes, std::size_t... S>
2470  CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence<S...>, TTraits::TypeList<ColumnTypes...>)
2471  {
2472  return fDataSource ? RDFInternal::AddDSColumns(validCols, fCustomColumns, *fDataSource, fLoopManager->GetNSlots(),
2473  std::index_sequence_for<ColumnTypes...>(),
2474  TTraits::TypeList<ColumnTypes...>())
2475  : fCustomColumns;
2476  }
2477 };
2478 
2479 } // end NS RDF
2480 
2481 } // namespace ROOT
2482 
2483 #endif // ROOT_RDF_INTERFACE
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:370
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:169
unsigned int GetNRuns() const
Gets the number of event loops run.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:691
Smart pointer for the return type of actions.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:294
void AddName(std::string_view name)
Internally it recreates the map with the new column name, and swaps with the old one.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
The head node of a RDF computation graph.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:653
A struct which stores the parameters of a TProfile2D.
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
Returns the available number of logical cores.
Definition: StringConv.hxx:21
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset&#39;s column names.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, std::string_view name)
Internally it recreates the map with the new column, and swaps with the old one.
bool IsInternalColumn(std::string_view colName)
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
typename TakeFirstParameter< T >::type TakeFirstParameter_t
Definition: TypeTraits.hxx:155
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:539
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:733
bool fLazy
Do not start the event loop when Snapshot is called.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:402
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1194
double T(double x)
Definition: ChebyshevPol.h:34
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:228
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:836
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:553
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
#define f(i)
Definition: RSha256.hxx:104
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:478
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:84
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
unsigned int GetNRuns() const
HeadNode_t CreateSnapshotRDF(const ColumnNames_t &validCols, std::string_view treeName, std::string_view fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source...
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:533
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
void Book(RDFInternal::RActionBase *actionPtr)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
A collection of options to steer the creation of the dataset on file.
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:559
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry...
Definition: RInterface.hxx:353
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
unsigned int GetNSlots() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
void CheckIMTDisabled(std::string_view callerName)
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:667
void ToJitExec(const std::string &) const
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
std::shared_ptr<::TH3D > GetHistogram() const
static constexpr double s
RLoopManager * fLoopManager
Definition: RInterface.hxx:103
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT&#39;s implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:522
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:813
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:598
RooArgSet S(const RooAbsArg &v1)
#define F(x, y, z)
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action) ...
Definition: RInterface.hxx:966
void BookFilterJit(const std::shared_ptr< RJittedFilter > &jittedFilter, std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds)
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:105
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:108
ROOT::R::TRInterface & r
Definition: Object.C:4
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
An action node in a RDF computation graph.
Definition: RAction.hxx:97
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:713
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
std::shared_ptr<::TH2D > GetHistogram() const
The public interface to the RDataFrame federation of classes.
unsigned int UInt_t
Definition: RtypesCore.h:44
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RBookedCustomColumns &customColumns, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:299
std::shared_ptr<::TProfile > GetProfile() const
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:187
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RCustomColumnBase *customColumn, bool vector2rvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:198
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action)
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
unsigned int GetNSlots() const
Gets the number of data processing slots.
std::shared_ptr<::TProfile2D > GetProfile() const
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot...
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
void AddColumnAlias(const std::string &alias, const std::string &colName)
Definition: graph.py:1
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:460
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action) ...
Definition: RInterface.hxx:929
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
#define h(i)
Definition: RSha256.hxx:106
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
Definition: RInterface.hxx:993
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
ROOT&#39;s RDataFrame offers a high level interface for analyses of data stored in TTrees, CSV&#39;s and other data formats.
Definition: RDataFrame.hxx:42
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:323
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
int type
Definition: TGX11.cxx:120
unsigned long long ULong64_t
Definition: RtypesCore.h:72
Print a TSeq at the prompt:
Definition: TDatime.h:115
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:854
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
const RCustomColumnBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
RLoopManager * GetLoopManager() const
std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
Extract types from the signature of a callable object. See CallableTraits.
Definition: TypeTraits.hxx:36
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames, which is expensive in terms of runtime, is called at most once.
typedef void((*Func_t)())
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:248
RDataSource * GetDataSource() const
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
friend class RInterface
Definition: RInterface.hxx:99
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:126
const Int_t kError
Definition: TError.h:39
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
std::shared_ptr< RJittedCustomColumn > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default) ...
Definition: RInterface.hxx:887
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
Definition: tree.py:1
Encapsulates the columns defined by the user.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
std::shared_ptr<::TH1D > GetHistogram() const
const std::map< std::string, std::string > & GetAliasMap() const
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:101
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:212
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
char name[80]
Definition: TGX11.cxx:109
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:610
ColumnNames_t ConvertRegexToColumns(const RDFInternal::RBookedCustomColumns &customColumns, TTree *tree, ROOT::RDF::RDataSource *dataSource, std::string_view columnNameRegexp, std::string_view callerName)
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action) ...
Definition: RInterface.hxx:763