Logo ROOT   6.12/07
Reference Guide
TDFInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_TDF_TINTERFACE
12 #define ROOT_TDF_TINTERFACE
13 
14 #include "ROOT/TResultProxy.hxx"
15 #include "ROOT/TDataSource.hxx"
16 #include "ROOT/TDFNodes.hxx"
18 #include "ROOT/TDFHistoModels.hxx"
19 #include "ROOT/TDFUtils.hxx"
20 #include "RtypesCore.h" // for ULong64_t
21 #include "TChain.h"
22 #include "TH1.h" // For Histo actions
23 #include "TH2.h" // For Histo actions
24 #include "TH3.h" // For Histo actions
25 #include "TInterpreter.h"
26 #include "TProfile.h" // For Histo actions
27 #include "TProfile2D.h" // For Histo actions
28 #include "TRegexp.h"
29 #include "TROOT.h" // IsImplicitMTEnabled
30 #include "TTreeReader.h"
31 
32 #include <initializer_list>
33 #include <memory>
34 #include <string>
35 #include <sstream>
36 #include <typeinfo>
37 #include <type_traits> // is_same, enable_if
38 
39 namespace ROOT {
40 
41 namespace Experimental {
42 namespace TDF {
43 template <typename T>
44 class TInterface;
45 
46 } // end NS TDF
47 } // end NS Experimental
48 
49 namespace Internal {
50 namespace TDF {
51 using namespace ROOT::Experimental::TDF;
52 using namespace ROOT::Detail::TDF;
53 
54 // CACHE --------------
55 
56 template <typename T>
58 public:
59  using value_type = T; // A shortcut to the value_type of the vector
60  std::vector<value_type> fContent;
61  // This method returns a pointer since we treat these columns as if they come
62  // from a data source, i.e. we forward an entry point to valid memory rather
63  // than a value.
64  value_type *operator()(unsigned int /*slot*/, ULong64_t iEvent) { return &fContent[iEvent]; };
65 };
66 
67 // ENDCACHE------------
68 
69 /****** BuildAndBook overloads *******/
70 // BuildAndBook builds a TAction with the right operation and books it with the TLoopManager
71 
72 // Generic filling (covers Histo2D, Histo3D, Profile1D and Profile2D actions, with and without weights)
73 template <typename... BranchTypes, typename ActionType, typename ActionResultType, typename PrevNodeType>
74 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h,
75  const unsigned int nSlots, TLoopManager &loopManager, PrevNodeType &prevNode, ActionType *)
76 {
77  using Helper_t = FillTOHelper<ActionResultType>;
78  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
79  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
80  loopManager.Book(action);
81  return action.get();
82 }
83 
84 // Histo1D filling (must handle the special case of distinguishing FillTOHelper and FillHelper
85 template <typename... BranchTypes, typename PrevNodeType>
86 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<::TH1D> &h, const unsigned int nSlots,
87  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Histo1D *)
88 {
89  auto hasAxisLimits = HistoUtils<::TH1D>::HasAxisLimits(*h);
90 
91  TActionBase *actionBase;
92  if (hasAxisLimits) {
93  using Helper_t = FillTOHelper<::TH1D>;
94  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
95  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
96  loopManager.Book(action);
97  actionBase = action.get();
98  } else {
99  using Helper_t = FillHelper;
100  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
101  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
102  loopManager.Book(action);
103  actionBase = action.get();
104  }
105 
106  return actionBase;
107 }
108 
109 // Min action
110 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
111 TActionBase *
112 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &minV, const unsigned int nSlots,
113  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Min *)
114 {
115  using Helper_t = MinHelper<ActionResultType>;
117  auto action = std::make_shared<Action_t>(Helper_t(minV, nSlots), bl, prevNode);
118  loopManager.Book(action);
119  return action.get();
120 }
121 
122 // Max action
123 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
124 TActionBase *
125 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &maxV, const unsigned int nSlots,
126  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Max *)
127 {
128  using Helper_t = MaxHelper<ActionResultType>;
130  auto action = std::make_shared<Action_t>(Helper_t(maxV, nSlots), bl, prevNode);
131  loopManager.Book(action);
132  return action.get();
133 }
134 
135 // Sum action
136 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
137 TActionBase *
138 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &sumV, const unsigned int nSlots,
139  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Sum *)
140 {
141  using Helper_t = SumHelper<ActionResultType>;
143  auto action = std::make_shared<Action_t>(Helper_t(sumV, nSlots), bl, prevNode);
144  loopManager.Book(action);
145  return action.get();
146 }
147 
148 // Mean action
149 template <typename BranchType, typename PrevNodeType>
150 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, const unsigned int nSlots,
151  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Mean *)
152 {
153  using Helper_t = MeanHelper;
155  auto action = std::make_shared<Action_t>(Helper_t(meanV, nSlots), bl, prevNode);
156  loopManager.Book(action);
157  return action.get();
158 }
159 /****** end BuildAndBook ******/
160 
161 std::vector<std::string> FindUsedColumnNames(std::string_view, TObjArray *, const std::vector<std::string> &);
162 
163 using TmpBranchBasePtr_t = std::shared_ptr<TCustomColumnBase>;
164 
165 Long_t JitTransformation(void *thisPtr, std::string_view methodName, std::string_view interfaceTypeName,
167  const std::map<std::string, std::string> &aliasMap, const ColumnNames_t &branches,
168  const std::vector<std::string> &customColumns,
169  const std::map<std::string, TmpBranchBasePtr_t> &tmpBookedBranches, TTree *tree,
170  std::string_view returnTypeName, TDataSource *ds);
171 
172 std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode,
173  const std::type_info &art, const std::type_info &at, const void *r, TTree *tree,
174  const unsigned int nSlots, const std::map<std::string, TmpBranchBasePtr_t> &customColumns,
175  TDataSource *ds, const std::shared_ptr<TActionBase *> *const actionPtrPtr);
176 
177 // allocate a shared_ptr on the heap, return a reference to it. the user is responsible of deleting the shared_ptr*.
178 // this function is meant to only be used by TInterface's action methods, and should be deprecated as soon as we find
179 // a better way to make jitting work: the problem it solves is that we need to pass the same shared_ptr to the Helper
180 // object of each action and to the TResultProxy returned by the action. While the former is only instantiated when
181 // the event loop is about to start, the latter has to be returned to the user as soon as the action is booked.
182 // a heap allocated shared_ptr will stay alive long enough that at jitting time its address is still valid.
183 template <typename T>
184 std::shared_ptr<T> *MakeSharedOnHeap(const std::shared_ptr<T> &shPtr)
185 {
186  return new std::shared_ptr<T>(shPtr);
187 }
188 
189 bool AtLeastOneEmptyString(const std::vector<std::string_view> strings);
190 
191 /* The following functions upcast shared ptrs to TFilter, TCustomColumn, TRange to their parent class (***Base).
192  * Shared ptrs to TLoopManager are just copied, as well as shared ptrs to ***Base classes. */
193 std::shared_ptr<TFilterBase> UpcastNode(const std::shared_ptr<TFilterBase> ptr);
194 std::shared_ptr<TCustomColumnBase> UpcastNode(const std::shared_ptr<TCustomColumnBase> ptr);
195 std::shared_ptr<TRangeBase> UpcastNode(const std::shared_ptr<TRangeBase> ptr);
196 std::shared_ptr<TLoopManager> UpcastNode(const std::shared_ptr<TLoopManager> ptr);
197 
198 ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
199  const ColumnNames_t &validCustomColumns, TDataSource *ds);
200 
201 std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols);
202 
203 /// Helper function to be used by `DefineDataSourceColumns`
204 template <typename T>
206 {
207  auto readers = ds.GetColumnReaders<T>(name);
208  auto getValue = [readers](unsigned int slot) { return *readers[slot]; };
210  lm.Book(std::make_shared<NewCol_t>(name, std::move(getValue), ColumnNames_t{}, &lm, /*isDSColumn=*/true));
211  lm.AddDataSourceColumn(name);
212 }
213 
214 /// Take a list of data-source column names and define the ones that haven't been defined yet.
215 template <typename... ColumnTypes, int... S>
216 void DefineDataSourceColumns(const std::vector<std::string> &columns, TLoopManager &lm, StaticSeq<S...>,
217  TTraits::TypeList<ColumnTypes...>, TDataSource &ds)
218 {
219  const auto mustBeDefined = FindUndefinedDSColumns(columns, lm.GetCustomColumnNames());
220  if (std::none_of(mustBeDefined.begin(), mustBeDefined.end(), [](bool b) { return b; })) {
221  // no need to define any column
222  return;
223  } else {
224  // hack to expand a template parameter pack without c++17 fold expressions.
225  std::initializer_list<int> expander{
226  (mustBeDefined[S] ? DefineDSColumnHelper<ColumnTypes>(columns[S], lm, ds) : /*no-op*/ ((void)0), 0)...};
227  (void)expander; // avoid unused variable warnings
228  }
229 }
230 
231 /// Convenience function invoked by jitted code to build action nodes at runtime
232 template <typename ActionType, typename... BranchTypes, typename PrevNodeType, typename ActionResultType>
233 void CallBuildAndBook(PrevNodeType &prevNode, const ColumnNames_t &bl, const unsigned int nSlots,
234  const std::shared_ptr<ActionResultType> *rOnHeap,
235  const std::shared_ptr<TActionBase *> *actionPtrPtrOnHeap)
236 {
237  // if we are here it means we are jitting, if we are jitting the loop manager must be alive
238  auto &loopManager = *prevNode.GetImplPtr();
239  using ColTypes_t = TypeList<BranchTypes...>;
240  constexpr auto nColumns = ColTypes_t::list_size;
241  auto ds = loopManager.GetDataSource();
242  if (ds)
243  DefineDataSourceColumns(bl, loopManager, GenStaticSeq_t<nColumns>(), ColTypes_t(), *ds);
244  TActionBase *actionPtr =
245  BuildAndBook<BranchTypes...>(bl, *rOnHeap, nSlots, loopManager, prevNode, (ActionType *)nullptr);
246  **actionPtrPtrOnHeap = actionPtr;
247  delete rOnHeap;
248  delete actionPtrPtrOnHeap;
249 }
250 
251 /// The contained `type` alias is `double` if `T == TInferType`, `U` if `T == std::container<U>`, `T` otherwise.
252 template <typename T, bool Container = TTraits::IsContainer<T>::value>
254  using type = T;
255 };
256 
257 template <>
258 struct TMinReturnType<TDFDetail::TInferType, false> {
259  using type = double;
260 };
261 
262 template <typename T>
263 struct TMinReturnType<T, true> {
265 };
266 
267 } // namespace TDF
268 } // namespace Internal
269 
270 namespace Detail {
271 namespace TDF {
272 
273 /// The aliased type is `double` if `T == TInferType`, `U` if `T == container<U>`, `T` otherwise.
274 template <typename T>
276 
277 template <typename T>
279 
280 template <typename T>
282 
283 template <typename T, typename COLL = std::vector<T>>
285  // We cannot put in the output collection C arrays: the ownership is not defined.
286  // We therefore proceed to check if T is an TArrayBranch
287  // If yes, we check what type is the output collection and we rebuild it.
288  // E.g. if a vector<V> was the selected collection, where V is TArrayBranch<T>,
289  // the collection becomes vector<vector<T>>.
290  static constexpr auto isAB = TDFInternal::IsTArrayBranch_t<T>::value;
291  using RealT_t = typename TDFInternal::ValueType<T>::value_type;
292  using VTColl_t = std::vector<RealT_t>;
293 
294  using NewC0_t =
295  typename std::conditional<isAB && TDFInternal::IsVector_t<COLL>::value, std::vector<VTColl_t>, COLL>::type;
296  using NewC1_t =
297  typename std::conditional<isAB && TDFInternal::IsList_t<NewC0_t>::value, std::list<VTColl_t>, NewC0_t>::type;
298  using NewC2_t =
299  typename std::conditional<isAB && TDFInternal::IsDeque_t<NewC1_t>::value, std::deque<VTColl_t>, NewC1_t>::type;
301 };
302 template <typename T, typename C>
304 } // namespace TDF
305 } // namespace Detail
306 
307 namespace Experimental {
308 
309 // forward declarations
310 class TDataFrame;
311 
312 } // namespace Experimental
313 } // namespace ROOT
314 
315 namespace cling {
316 std::string printValue(ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the prompt
317 }
318 
319 namespace ROOT {
320 namespace Experimental {
321 namespace TDF {
322 namespace TDFDetail = ROOT::Detail::TDF;
323 namespace TDFInternal = ROOT::Internal::TDF;
324 namespace TTraits = ROOT::TypeTraits;
325 
326 /**
327 * \class ROOT::Experimental::TDF::TInterface
328 * \ingroup dataframe
329 * \brief The public interface to the TDataFrame federation of classes
330 * \tparam T One of the "node" base types (e.g. TLoopManager, TFilterBase). The user never specifies this type manually.
331 */
332 template <typename Proxied>
333 class TInterface {
334  using ColumnNames_t = TDFDetail::ColumnNames_t;
339  friend std::string cling::printValue(::ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the prompt
340  template <typename T>
341  friend class TInterface;
342 
343  const std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this TInterface.
344  const std::weak_ptr<TLoopManager> fImplWeakPtr; ///< Weak pointer to the TLoopManager at the root of the graph.
345  ColumnNames_t fValidCustomColumns; ///< Names of columns `Define`d for this branch of the functional graph.
346  /// Non-owning pointer to a data-source object. Null if no data-source. TLoopManager has ownership of the object.
347  TDataSource *const fDataSource = nullptr;
348 
349 public:
350  /// \cond HIDDEN_SYMBOLS
351  // Template conversion operator, meant to use to convert TInterfaces of certain node types to TInterfaces of base
352  // classes of those node types, e.g. TInterface<TFilter<F,P>> -> TInterface<TFilterBase>.
353  // It is used implicitly when a call to Filter or Define is jitted: the jitted call must convert the
354  // TInterface returned by the jitted transformations to a TInterface<***Base> before returning.
355  // Must be public because it is cling that uses it.
356  template <typename NewProxied>
357  operator TInterface<NewProxied>()
358  {
359  static_assert(std::is_base_of<NewProxied, Proxied>::value,
360  "TInterface<T> can only be converted to TInterface<BaseOfT>");
361  return TInterface<NewProxied>(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
362  }
364  /// \endcond
365 
366  ////////////////////////////////////////////////////////////////////////////
367  /// \brief Copy-assignment operator for TInterface.
368  TInterface &operator=(const TInterface &) = default;
369 
370  ////////////////////////////////////////////////////////////////////////////
371  /// \brief Copy-ctor for TInterface.
372  TInterface(const TInterface &) = default;
373 
374  ////////////////////////////////////////////////////////////////////////////
375  /// \brief Move-ctor for TInterface.
376  TInterface(TInterface &&) = default;
377 
378  ////////////////////////////////////////////////////////////////////////////
379  /// \brief Append a filter to the call graph.
380  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
381  /// signalling whether the event has passed the selection (true) or not (false).
382  /// \param[in] columns Names of the columns/branches in input to the filter function.
383  /// \param[in] name Optional name of this filter. See `Report`.
384  ///
385  /// Append a filter node at the point of the call graph corresponding to the
386  /// object this method is called on.
387  /// The callable `f` should not have side-effects (e.g. modification of an
388  /// external or static variable) to ensure correct results when implicit
389  /// multi-threading is active.
390  ///
391  /// TDataFrame only evaluates filters when necessary: if multiple filters
392  /// are chained one after another, they are executed in order and the first
393  /// one returning false causes the event to be discarded.
394  /// Even if multiple actions or transformations depend on the same filter,
395  /// it is executed once per entry. If its result is requested more than
396  /// once, the cached result is served.
397  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
399  {
400  TDFInternal::CheckFilter(f);
401  auto loopManager = GetDataFrameChecked();
402  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
403  constexpr auto nColumns = ColTypes_t::list_size;
404  const auto validColumnNames =
405  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
406  if (fDataSource)
407  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
408  ColTypes_t(), *fDataSource);
409  using F_t = TDFDetail::TFilter<F, Proxied>;
410  auto FilterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, *fProxiedPtr, name);
411  loopManager->Book(FilterPtr);
412  return TInterface<F_t>(FilterPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
413  }
414 
415  ////////////////////////////////////////////////////////////////////////////
416  /// \brief Append a filter to the call graph.
417  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
418  /// signalling whether the event has passed the selection (true) or not (false).
419  /// \param[in] name Optional name of this filter. See `Report`.
420  ///
421  /// Refer to the first overload of this method for the full documentation.
422  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
424  {
425  // The sfinae is there in order to pick up the overloaded method which accepts two strings
426  // rather than this template method.
427  return Filter(f, {}, name);
428  }
429 
430  ////////////////////////////////////////////////////////////////////////////
431  /// \brief Append a filter to the call graph.
432  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
433  /// signalling whether the event has passed the selection (true) or not (false).
434  /// \param[in] columns Names of the columns/branches in input to the filter function.
435  ///
436  /// Refer to the first overload of this method for the full documentation.
437  template <typename F>
438  TInterface<TDFDetail::TFilter<F, Proxied>> Filter(F f, const std::initializer_list<std::string> &columns)
439  {
440  return Filter(f, ColumnNames_t{columns});
441  }
442 
443  ////////////////////////////////////////////////////////////////////////////
444  /// \brief Append a filter to the call graph.
445  /// \param[in] expression The filter expression in C++
446  /// \param[in] name Optional name of this filter. See `Report`.
447  ///
448  /// The expression is just-in-time compiled and used to filter entries. It must
449  /// be valid C++ syntax in which variable names are substituted with the names
450  /// of branches/columns.
451  ///
452  /// Refer to the first overload of this method for the full documentation.
454  {
455  auto retVal = CallJitTransformation("Filter", name, expression, "ROOT::Detail::TDF::TFilterBase");
456  return *(TInterface<TFilterBase> *)retVal;
457  }
458 
459  // clang-format off
460  ////////////////////////////////////////////////////////////////////////////
461  /// \brief Creates a custom column
462  /// \param[in] name The name of the custom column.
463  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
464  /// \param[in] columns Names of the columns/branches in input to the producer function.
465  ///
466  /// Create a custom column that will be visible from all subsequent nodes
467  /// of the functional chain. The `expression` is only evaluated for entries that pass
468  /// all the preceding filters.
469  /// A new variable is created called `name`, accessible as if it was contained
470  /// in the dataset from subsequent transformations/actions.
471  ///
472  /// Use cases include:
473  ///
474  /// * caching the results of complex calculations for easy and efficient multiple access
475  /// * extraction of quantities of interest from complex objects
476  /// * column aliasing, i.e. changing the name of a branch/column
477  ///
478  /// An exception is thrown if the name of the new column is already in use.
479  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
480  TInterface<Proxied> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
481  {
482  return DefineImpl<F, TDFDetail::TCCHelperTypes::TNothing>(name, std::move(expression), columns);
483  }
484  // clang-format on
485 
486  // clang-format off
487  ////////////////////////////////////////////////////////////////////////////
488  /// \brief Creates a custom column with a value dependent on the processing slot.
489  /// \param[in] name The name of the custom column.
490  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
491  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
492  ///
493  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
494  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
495  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
496  /// representing a "slot number". TDataFrame guarantees that different threads will invoke the expression with
497  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
498  ///
499  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
500  /// ~~~{.cpp}
501  /// int function(unsigned int, double, double);
502  /// Define("x", function, {"tdfslot_", "column1", "column2"})
503  /// DefineSlot("x", function, {"column1", "column2"})
504  /// ~~~
505  ///
506  /// See Define for more information.
507  template <typename F>
508  TInterface<Proxied> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
509  {
510  return DefineImpl<F, TDFDetail::TCCHelperTypes::TSlot>(name, std::move(expression), columns);
511  }
512  // clang-format on
513 
514  // clang-format off
515  ////////////////////////////////////////////////////////////////////////////
516  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
517  /// \param[in] name The name of the custom column.
518  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
519  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
520  ///
521  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
522  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
523  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
524  /// integer representing a "slot number". TDataFrame guarantees that different threads will invoke the expression with
525  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
526  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
527  ///
528  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
529  /// ~~~{.cpp}
530  /// int function(unsigned int, ULong64_t, double, double);
531  /// Define("x", function, {"tdfslot_", "tdfentry_", "column1", "column2"})
532  /// DefineSlotEntry("x", function, {"column1", "column2"})
533  /// ~~~
534  ///
535  /// See Define for more information.
536  template <typename F>
537  TInterface<Proxied> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
538  {
539  return DefineImpl<F, TDFDetail::TCCHelperTypes::TSlotAndEntry>(name, std::move(expression), columns);
540  }
541  // clang-format on
542 
543  ////////////////////////////////////////////////////////////////////////////
544  /// \brief Creates a custom column
545  /// \param[in] name The name of the custom column.
546  /// \param[in] expression An expression in C++ which represents the temporary value
547  ///
548  /// The expression is just-in-time compiled and used to produce the column entries. The
549  /// It must be valid C++ syntax in which variable names are substituted with the names
550  /// of branches/columns.
551  ///
552  /// Refer to the first overload of this method for the full documentation.
553  TInterfaceJittedDefine Define(std::string_view name, std::string_view expression)
554  {
555  auto loopManager = GetDataFrameChecked();
556  // this check must be done before jitting lest we throw exceptions in jitted code
557  TDFInternal::CheckCustomColumn(name, loopManager->GetTree(), loopManager->GetCustomColumnNames(),
558  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
559  auto retVal = CallJitTransformation("Define", name, expression, TInterfaceJittedDefine::GetNodeTypeName());
560  auto retInterface = reinterpret_cast<TInterfaceJittedDefine *>(retVal);
561  return *retInterface;
562  }
563 
564  ////////////////////////////////////////////////////////////////////////////
565  /// \brief Allow to refer to a column with a different name
566  /// \param[in] alias name of the column alias
567  /// \param[in] columnName of the column to be aliased
568  /// Aliasing an alias is supported.
570  {
571  // The symmetry with Define is clear. We want to:
572  // - Create globally the alias and return this very node, unchanged
573  // - Make aliases accessible based on chains and not globally
574  auto loopManager = GetDataFrameChecked();
575 
576  // Helper to find out if a name is a column
577  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
578 
579  // If the alias name is a column name, there is a problem
580  TDFInternal::CheckCustomColumn(alias, loopManager->GetTree(), fValidCustomColumns, dsColumnNames);
581 
582  const auto validColumnName = TDFInternal::GetValidatedColumnNames(*loopManager, 1, {std::string(columnName)},
583  fValidCustomColumns, fDataSource)[0];
584 
585  loopManager->AddColumnAlias(std::string(alias), validColumnName);
586  TInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
587  newInterface.fValidCustomColumns.emplace_back(alias);
588  return newInterface;
589  }
590 
591  ////////////////////////////////////////////////////////////////////////////
592  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
593  /// \tparam BranchTypes variadic list of branch/column types
594  /// \param[in] treename The name of the output TTree
595  /// \param[in] filename The name of the output TFile
596  /// \param[in] columnList The list of names of the columns/branches to be written
597  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
598  ///
599  /// This function returns a `TDataFrame` built with the output tree as a source.
600  template <typename... BranchTypes>
602  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
603  const TSnapshotOptions &options = TSnapshotOptions())
604  {
605  return SnapshotImpl<BranchTypes...>(treename, filename, columnList, options);
606  }
607 
608  ////////////////////////////////////////////////////////////////////////////
609  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
610  /// \param[in] treename The name of the output TTree
611  /// \param[in] filename The name of the output TFile
612  /// \param[in] columnList The list of names of the columns/branches to be written
613  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
614  ///
615  /// This function returns a `TDataFrame` built with the output tree as a source.
616  /// The types of the columns are automatically inferred and do not need to be specified.
618  const ColumnNames_t &columnList,
619  const TSnapshotOptions &options = TSnapshotOptions())
620  {
621  // Early return: if the list of columns is empty, just return an empty TDF
622  // If we proceed, the jitted call will not compile!
623  if (columnList.empty()) {
624  auto nEntries = *this->Count();
625  TInterface<TLoopManager> emptyTDF(std::make_shared<TLoopManager>(nEntries));
626  return emptyTDF;
627  }
628  auto df = GetDataFrameChecked();
629  auto tree = df->GetTree();
630  std::stringstream snapCall;
631  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
632  TInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
633  fValidCustomColumns, fDataSource);
634  // build a string equivalent to
635  // "(TInterface<nodetype*>*)(this)->Snapshot<Ts...>(treename,filename,*(ColumnNames_t*)(&columnList), options)"
636  snapCall << "reinterpret_cast<ROOT::Experimental::TDF::TInterface<" << upcastInterface.GetNodeTypeName() << ">*>("
637  << &upcastInterface << ")->Snapshot<";
638  bool first = true;
639  for (auto &b : columnList) {
640  if (!first)
641  snapCall << ", ";
642  snapCall << TDFInternal::ColumnName2ColumnTypeName(b, tree, df->GetBookedBranch(b), fDataSource);
643  first = false;
644  };
645  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
646  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
647  << &columnList << "),"
648  << "*reinterpret_cast<ROOT::Experimental::TDF::TSnapshotOptions*>(" << &options << "));";
649  // jit snapCall, return result
650  TInterpreter::EErrorCode errorCode;
651  auto newTDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
652  if (TInterpreter::EErrorCode::kNoError != errorCode) {
653  std::string msg = "Cannot jit Snapshot call. Interpreter error code is " + std::to_string(errorCode) + ".";
654  throw std::runtime_error(msg);
655  }
656  return *reinterpret_cast<TInterface<TLoopManager> *>(newTDFPtr);
657  }
658 
659  // clang-format off
660  ////////////////////////////////////////////////////////////////////////////
661  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
662  /// \param[in] treename The name of the output TTree
663  /// \param[in] filename The name of the output TFile
664  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. See the documentation of TRegexp for more details. An empty string signals the selection of all columns.
665  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
666  ///
667  /// This function returns a `TDataFrame` built with the output tree as a source.
668  /// The types of the columns are automatically inferred and do not need to be specified.
670  std::string_view columnNameRegexp = "",
671  const TSnapshotOptions &options = TSnapshotOptions())
672  {
673  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Snapshot");
674  return Snapshot(treename, filename, selectedColumns, options);
675  }
676  // clang-format on
677 
678  ////////////////////////////////////////////////////////////////////////////
679  /// \brief Save selected columns in memory
680  /// \param[in] columns to be cached in memory
681  ///
682  /// The content of the selected columns is saved in memory exploiting the functionality offered by
683  /// the Take action. No extra copy is carried out when serving cached data to the actions and
684  /// transformations requesting it.
685  template <typename... BranchTypes>
687  {
688  auto staticSeq = TDFInternal::GenStaticSeq_t<sizeof...(BranchTypes)>();
689  return CacheImpl<BranchTypes...>(columnList, staticSeq);
690  }
691 
692  ////////////////////////////////////////////////////////////////////////////
693  /// \brief Save selected columns in memory
694  /// \param[in] columns to be cached in memory
695  ///
696  /// The content of the selected columns is saved in memory exploiting the functionality offered by
697  /// the Take action. No extra copy is carried out when serving cached data to the actions and
698  /// transformations requesting it.
700  {
701  // Early return: if the list of columns is empty, just return an empty TDF
702  // If we proceed, the jitted call will not compile!
703  if (columnList.empty()) {
704  auto nEntries = *this->Count();
705  TInterface<TLoopManager> emptyTDF(std::make_shared<TLoopManager>(nEntries));
706  return emptyTDF;
707  }
708 
709  auto df = GetDataFrameChecked();
710  auto tree = df->GetTree();
711  std::stringstream snapCall;
712  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
713  TInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
714  fValidCustomColumns, fDataSource);
715  // build a string equivalent to
716  // "(TInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
717  snapCall << "reinterpret_cast<ROOT::Experimental::TDF::TInterface<" << upcastInterface.GetNodeTypeName() << ">*>("
718  << &upcastInterface << ")->Cache<";
719  bool first = true;
720  for (auto &b : columnList) {
721  if (!first)
722  snapCall << ", ";
723  snapCall << TDFInternal::ColumnName2ColumnTypeName(b, tree, df->GetBookedBranch(b), fDataSource);
724  first = false;
725  };
726  snapCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
727  << &columnList << "));";
728  // jit snapCall, return result
729  TInterpreter::EErrorCode errorCode;
730  auto newTDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
731  if (TInterpreter::EErrorCode::kNoError != errorCode) {
732  std::string msg = "Cannot jit Cache call. Interpreter error code is " + std::to_string(errorCode) + ".";
733  throw std::runtime_error(msg);
734  }
735  return *reinterpret_cast<TInterface<TLoopManager> *>(newTDFPtr);
736  }
737 
738  ////////////////////////////////////////////////////////////////////////////
739  /// \brief Save selected columns in memory
740  /// \param[in] a regular expression to select the columns
741  ///
742  /// The existing columns are matched against the regeular expression. If the string provided
743  /// is empty, all columns are selected.
745  {
746  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Cache");
747  return Cache(selectedColumns);
748  }
749 
750  ////////////////////////////////////////////////////////////////////////////
751  /// \brief Creates a node that filters entries based on range
752  /// \param[in] start How many entries to discard before resuming processing.
753  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
754  /// \param[in] stride Process one entry every `stride` entries. Must be strictly greater than 0.
755  ///
756  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
757  TInterface<TDFDetail::TRange<Proxied>> Range(unsigned int start, unsigned int stop, unsigned int stride = 1)
758  {
759  // check invariants
760  if (stride == 0 || (stop != 0 && stop < start))
761  throw std::runtime_error("Range: stride must be strictly greater than 0 and stop must be greater than start.");
763  throw std::runtime_error("Range was called with ImplicitMT enabled. Multi-thread ranges are not supported.");
764 
765  auto df = GetDataFrameChecked();
767  auto RangePtr = std::make_shared<Range_t>(start, stop, stride, *fProxiedPtr);
768  df->Book(RangePtr);
769  TInterface<TDFDetail::TRange<Proxied>> tdf_r(RangePtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
770  return tdf_r;
771  }
772 
773  ////////////////////////////////////////////////////////////////////////////
774  /// \brief Creates a node that filters entries based on range
775  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
776  ///
777  /// See the other Range overload for a detailed description.
778  TInterface<TDFDetail::TRange<Proxied>> Range(unsigned int stop) { return Range(0, stop, 1); }
779 
780  ////////////////////////////////////////////////////////////////////////////
781  /// \brief Execute a user-defined function on each entry (*instant action*)
782  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
783  /// calculations.
784  /// \param[in] columns Names of the columns/branches in input to the user function.
785  ///
786  /// The callable `f` is invoked once per entry. This is an *instant action*:
787  /// upon invocation, an event loop as well as execution of all scheduled actions
788  /// is triggered.
789  /// Users are responsible for the thread-safety of this callable when executing
790  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
791  template <typename F>
792  void Foreach(F f, const ColumnNames_t &columns = {})
793  {
795  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
796  ForeachSlot(TDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
797  }
798 
799  ////////////////////////////////////////////////////////////////////////////
800  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
801  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
802  /// calculations.
803  /// \param[in] columns Names of the columns/branches in input to the user function.
804  ///
805  /// Same as `Foreach`, but the user-defined function takes an extra
806  /// `unsigned int` as its first parameter, the *processing slot index*.
807  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
808  /// for each thread of execution.
809  /// This is meant as a helper in writing thread-safe `Foreach`
810  /// actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`.
811  /// The user-defined processing callable is able to follow different
812  /// *streams of processing* indexed by the first parameter.
813  /// `ForeachSlot` works just as well with single-thread execution: in that
814  /// case `slot` will always be `0`.
815  template <typename F>
816  void ForeachSlot(F f, const ColumnNames_t &columns = {})
817  {
818  auto loopManager = GetDataFrameChecked();
820  constexpr auto nColumns = ColTypes_t::list_size;
821  const auto validColumnNames =
822  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
823  if (fDataSource)
824  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
825  ColTypes_t(), *fDataSource);
826  using Helper_t = TDFInternal::ForeachSlotHelper<F>;
828  loopManager->Book(std::make_shared<Action_t>(Helper_t(std::move(f)), validColumnNames, *fProxiedPtr));
829  loopManager->Run();
830  }
831 
832  ////////////////////////////////////////////////////////////////////////////
833  /// \brief Execute a user-defined reduce operation on the values of a column.
834  /// \tparam F The type of the reduce callable. Automatically deduced.
835  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
836  /// \param[in] f A callable with signature `T(T,T)`
837  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
838  ///
839  /// A reduction takes two values of a column and merges them into one (e.g.
840  /// by summing them, taking the maximum, etc). This action performs the
841  /// specified reduction operation on all processed column values, returning
842  /// a single value of the same type. The callable f must satisfy the general
843  /// requirements of a *processing function* besides having signature `T(T,T)`
844  /// where `T` is the type of column columnName.
845  ///
846  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
847  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
848  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
849  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
850  /// overload.
851  ///
852  /// This action is *lazy*: upon invocation of this method the calculation is
853  /// booked but not executed. See TResultProxy documentation.
854  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
856  {
857  static_assert(
858  std::is_default_constructible<T>::value,
859  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
860  return Reduce(std::move(f), columnName, T());
861  }
862 
863  ////////////////////////////////////////////////////////////////////////////
864  /// \brief Execute a user-defined reduce operation on the values of a column.
865  /// \tparam F The type of the reduce callable. Automatically deduced.
866  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
867  /// \param[in] f A callable with signature `T(T,T)`
868  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
869  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
870  ///
871  /// See the description of the first Reduce overload for more information.
872  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
873  TResultProxy<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
874  {
875  using arg_types = typename TTraits::CallableTraits<F>::arg_types;
876  TDFInternal::CheckReduce(f, arg_types());
877  auto loopManager = GetDataFrameChecked();
878  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
879  constexpr auto nColumns = arg_types::list_size;
880  const auto validColumnNames =
881  TDFInternal::GetValidatedColumnNames(*loopManager, 1, columns, fValidCustomColumns, fDataSource);
882  if (fDataSource)
883  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
884  arg_types(), *fDataSource);
885  auto redObjPtr = std::make_shared<T>(redIdentity);
886  using Helper_t = TDFInternal::ReduceHelper<F, T>;
887  using Action_t = typename TDFInternal::TAction<Helper_t, Proxied>;
888  auto action = std::make_shared<Action_t>(Helper_t(std::move(f), redObjPtr, fProxiedPtr->GetNSlots()),
889  validColumnNames, *fProxiedPtr);
890  loopManager->Book(action);
891  return MakeResultProxy(redObjPtr, loopManager, action.get());
892  }
893 
894  ////////////////////////////////////////////////////////////////////////////
895  /// \brief Return the number of entries processed (*lazy action*)
896  ///
897  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
898  /// This action is *lazy*: upon invocation of this method the calculation is
899  /// booked but not executed. See TResultProxy documentation.
901  {
902  auto df = GetDataFrameChecked();
903  const auto nSlots = fProxiedPtr->GetNSlots();
904  auto cSPtr = std::make_shared<ULong64_t>(0);
905  using Helper_t = TDFInternal::CountHelper;
907  auto action = std::make_shared<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), *fProxiedPtr);
908  df->Book(action);
909  return MakeResultProxy(cSPtr, df, action.get());
910  }
911 
912  ////////////////////////////////////////////////////////////////////////////
913  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
914  /// \tparam T The type of the column.
915  /// \tparam COLL The type of collection used to store the values.
916  /// \param[in] column The name of the column to collect the values of.
917  ///
918  /// If the type of the column is `TArrayBranch<T>`, i.e. in the ROOT dataset this is
919  /// a C-style array, the type stored in the return container is a `std::vector<T>` to
920  /// guarantee the lifetime of the data involved.
921  /// This action is *lazy*: upon invocation of this method the calculation is
922  /// booked but not executed. See TResultProxy documentation.
923  template <typename T, typename COLL = std::vector<T>>
925  {
926  auto loopManager = GetDataFrameChecked();
927  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
928  const auto validColumnNames =
929  TDFInternal::GetValidatedColumnNames(*loopManager, 1, columns, fValidCustomColumns, fDataSource);
930  if (fDataSource)
931  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<1>(),
932  TTraits::TypeList<T>(), *fDataSource);
933 
934  using RealT_t = typename TDFDetail::TTakeRealTypes<T, COLL>::RealT_t;
935  using RealColl_t = typename TDFDetail::TTakeRealTypes<T, COLL>::RealColl_t;
936 
937  using Helper_t = TDFInternal::TakeHelper<RealT_t, T, RealColl_t>;
939  auto valuesPtr = std::make_shared<RealColl_t>();
940  const auto nSlots = fProxiedPtr->GetNSlots();
941  auto action = std::make_shared<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, *fProxiedPtr);
942  loopManager->Book(action);
943  return MakeResultProxy(valuesPtr, loopManager, action.get());
944  }
945 
946  ////////////////////////////////////////////////////////////////////////////
947  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
948  /// \tparam V The type of the column used to fill the histogram.
949  /// \param[in] model The returned histogram will be constructed using this as a model.
950  /// \param[in] vName The name of the column that will fill the histogram.
951  ///
952  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
953  /// is filled with each one of the elements of the container. In case multiple columns of container type
954  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
955  /// possibly different lengths between events).
956  /// This action is *lazy*: upon invocation of this method the calculation is
957  /// booked but not executed. See TResultProxy documentation.
958  /// The user gives up ownership of the model histogram.
959  template <typename V = TDFDetail::TInferType>
960  TResultProxy<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
961  {
962  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
963  std::shared_ptr<::TH1D> h(nullptr);
964  {
965  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
966  h = model.GetHistogram();
967  }
968 
969  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
970  TDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
971  return CreateAction<TDFInternal::ActionTypes::Histo1D, V>(userColumns, h);
972  }
973 
974  template <typename V = TDFDetail::TInferType>
976  {
977  return Histo1D<V>({"", "", 128u, 0., 0.}, vName);
978  }
979 
980  ////////////////////////////////////////////////////////////////////////////
981  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
982  /// \tparam V The type of the column used to fill the histogram.
983  /// \tparam W The type of the column used as weights.
984  /// \param[in] model The returned histogram will be constructed using this as a model.
985  /// \param[in] vName The name of the column that will fill the histogram.
986  /// \param[in] wName The name of the column that will provide the weights.
987  ///
988  /// See the description of the first Histo1D overload for more details.
989  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
991  {
992  auto columnViews = {vName, wName};
993  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
994  ? ColumnNames_t()
995  : ColumnNames_t(columnViews.begin(), columnViews.end());
996  std::shared_ptr<::TH1D> h(nullptr);
997  {
998  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
999  h = model.GetHistogram();
1000  }
1001  return CreateAction<TDFInternal::ActionTypes::Histo1D, V, W>(userColumns, h);
1002  }
1003 
1004  ////////////////////////////////////////////////////////////////////////////
1005  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1006  /// \tparam V The type of the column used to fill the histogram.
1007  /// \tparam W The type of the column used as weights.
1008  /// \param[in] vName The name of the column that will fill the histogram.
1009  /// \param[in] wName The name of the column that will provide the weights.
1010  ///
1011  /// This overload uses a default model histogram TH1D("", "", 128u, 0., 0.).
1012  /// See the description of the first Histo1D overload for more details.
1013  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1015  {
1016  return Histo1D<V, W>({"", "", 128u, 0., 0.}, vName, wName);
1017  }
1018 
1019  ////////////////////////////////////////////////////////////////////////////
1020  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1021  /// \tparam V The type of the column used to fill the histogram.
1022  /// \tparam W The type of the column used as weights.
1023  /// \param[in] model The returned histogram will be constructed using this as a model.
1024  ///
1025  /// This overload will use the first two default columns as column names.
1026  /// See the description of the first Histo1D overload for more details.
1027  template <typename V, typename W>
1028  TResultProxy<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1029  {
1030  return Histo1D<V, W>(model, "", "");
1031  }
1032 
1033  ////////////////////////////////////////////////////////////////////////////
1034  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1035  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1036  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1037  /// \param[in] model The returned histogram will be constructed using this as a model.
1038  /// \param[in] v1Name The name of the column that will fill the x axis.
1039  /// \param[in] v2Name The name of the column that will fill the y axis.
1040  ///
1041  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1042  /// is filled with each one of the elements of the container. In case multiple columns of container type
1043  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1044  /// possibly different lengths between events).
1045  /// This action is *lazy*: upon invocation of this method the calculation is
1046  /// booked but not executed. See TResultProxy documentation.
1047  /// The user gives up ownership of the model histogram.
1048  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
1050  {
1051  std::shared_ptr<::TH2D> h(nullptr);
1052  {
1053  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1054  h = model.GetHistogram();
1055  }
1056  if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1057  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1058  }
1059  auto columnViews = {v1Name, v2Name};
1060  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1061  ? ColumnNames_t()
1062  : ColumnNames_t(columnViews.begin(), columnViews.end());
1063  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2>(userColumns, h);
1064  }
1065 
1066  ////////////////////////////////////////////////////////////////////////////
1067  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1068  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1069  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1070  /// \tparam W The type of the column used for the weights of the histogram.
1071  /// \param[in] model The returned histogram will be constructed using this as a model.
1072  /// \param[in] v1Name The name of the column that will fill the x axis.
1073  /// \param[in] v2Name The name of the column that will fill the y axis.
1074  /// \param[in] wName The name of the column that will provide the weights.
1075  ///
1076  /// This action is *lazy*: upon invocation of this method the calculation is
1077  /// booked but not executed. See TResultProxy documentation.
1078  /// The user gives up ownership of the model histogram.
1079  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1080  typename W = TDFDetail::TInferType>
1083  {
1084  std::shared_ptr<::TH2D> h(nullptr);
1085  {
1086  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1087  h = model.GetHistogram();
1088  }
1089  if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1090  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1091  }
1092  auto columnViews = {v1Name, v2Name, wName};
1093  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1094  ? ColumnNames_t()
1095  : ColumnNames_t(columnViews.begin(), columnViews.end());
1096  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2, W>(userColumns, h);
1097  }
1098 
1099  template <typename V1, typename V2, typename W>
1101  {
1102  return Histo2D<V1, V2, W>(model, "", "", "");
1103  }
1104 
1105  ////////////////////////////////////////////////////////////////////////////
1106  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1107  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1108  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1109  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1110  /// \param[in] model The returned histogram will be constructed using this as a model.
1111  /// \param[in] v1Name The name of the column that will fill the x axis.
1112  /// \param[in] v2Name The name of the column that will fill the y axis.
1113  /// \param[in] v3Name The name of the column that will fill the z axis.
1114  ///
1115  /// This action is *lazy*: upon invocation of this method the calculation is
1116  /// booked but not executed. See TResultProxy documentation.
1117  /// The user gives up ownership of the model histogram.
1118  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1119  typename V3 = TDFDetail::TInferType>
1121  std::string_view v3Name = "")
1122  {
1123  std::shared_ptr<::TH3D> h(nullptr);
1124  {
1125  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1126  h = model.GetHistogram();
1127  }
1128  if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1129  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1130  }
1131  auto columnViews = {v1Name, v2Name, v3Name};
1132  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1133  ? ColumnNames_t()
1134  : ColumnNames_t(columnViews.begin(), columnViews.end());
1135  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3>(userColumns, h);
1136  }
1137 
1138  ////////////////////////////////////////////////////////////////////////////
1139  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1140  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1141  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1142  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1143  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1144  /// \param[in] model The returned histogram will be constructed using this as a model.
1145  /// \param[in] v1Name The name of the column that will fill the x axis.
1146  /// \param[in] v2Name The name of the column that will fill the y axis.
1147  /// \param[in] v3Name The name of the column that will fill the z axis.
1148  /// \param[in] wName The name of the column that will provide the weights.
1149  ///
1150  /// This action is *lazy*: upon invocation of this method the calculation is
1151  /// booked but not executed. See TResultProxy documentation.
1152  /// The user gives up ownership of the model histogram.
1153  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1154  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1156  std::string_view v3Name, std::string_view wName)
1157  {
1158  std::shared_ptr<::TH3D> h(nullptr);
1159  {
1160  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1161  h = model.GetHistogram();
1162  }
1163  if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1164  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1165  }
1166  auto columnViews = {v1Name, v2Name, v3Name, wName};
1167  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1168  ? ColumnNames_t()
1169  : ColumnNames_t(columnViews.begin(), columnViews.end());
1170  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3, W>(userColumns, h);
1171  }
1172 
1173  template <typename V1, typename V2, typename V3, typename W>
1175  {
1176  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1177  }
1178 
1179  ////////////////////////////////////////////////////////////////////////////
1180  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1181  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1182  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1183  /// \param[in] model The model to be considered to build the new return value.
1184  /// \param[in] v1Name The name of the column that will fill the x axis.
1185  /// \param[in] v2Name The name of the column that will fill the y axis.
1186  ///
1187  /// This action is *lazy*: upon invocation of this method the calculation is
1188  /// booked but not executed. See TResultProxy documentation.
1189  /// The user gives up ownership of the model profile object.
1190  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
1193  {
1194  std::shared_ptr<::TProfile> h(nullptr);
1195  {
1196  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1197  h = model.GetProfile();
1198  }
1199 
1200  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1201  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1202  }
1203  auto columnViews = {v1Name, v2Name};
1204  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1205  ? ColumnNames_t()
1206  : ColumnNames_t(columnViews.begin(), columnViews.end());
1207  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2>(userColumns, h);
1208  }
1209 
1210  ////////////////////////////////////////////////////////////////////////////
1211  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1212  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1213  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1214  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1215  /// \param[in] model The model to be considered to build the new return value.
1216  /// \param[in] v1Name The name of the column that will fill the x axis.
1217  /// \param[in] v2Name The name of the column that will fill the y axis.
1218  /// \param[in] wName The name of the column that will provide the weights.
1219  ///
1220  /// This action is *lazy*: upon invocation of this method the calculation is
1221  /// booked but not executed. See TResultProxy documentation.
1222  /// The user gives up ownership of the model profile object.
1223  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1224  typename W = TDFDetail::TInferType>
1227  {
1228  std::shared_ptr<::TProfile> h(nullptr);
1229  {
1230  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1231  h = model.GetProfile();
1232  }
1233 
1234  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1235  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1236  }
1237  auto columnViews = {v1Name, v2Name, wName};
1238  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1239  ? ColumnNames_t()
1240  : ColumnNames_t(columnViews.begin(), columnViews.end());
1241  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2, W>(userColumns, h);
1242  }
1243 
1244  template <typename V1, typename V2, typename W>
1246  {
1247  return Profile1D<V1, V2, W>(model, "", "", "");
1248  }
1249 
1250  ////////////////////////////////////////////////////////////////////////////
1251  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1252  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1253  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1254  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1255  /// \param[in] model The returned profile will be constructed using this as a model.
1256  /// \param[in] v1Name The name of the column that will fill the x axis.
1257  /// \param[in] v2Name The name of the column that will fill the y axis.
1258  /// \param[in] v3Name The name of the column that will fill the z axis.
1259  ///
1260  /// This action is *lazy*: upon invocation of this method the calculation is
1261  /// booked but not executed. See TResultProxy documentation.
1262  /// The user gives up ownership of the model profile.
1263  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1264  typename V3 = TDFDetail::TInferType>
1266  std::string_view v2Name = "", std::string_view v3Name = "")
1267  {
1268  std::shared_ptr<::TProfile2D> h(nullptr);
1269  {
1270  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1271  h = model.GetProfile();
1272  }
1273 
1274  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1275  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1276  }
1277  auto columnViews = {v1Name, v2Name, v3Name};
1278  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1279  ? ColumnNames_t()
1280  : ColumnNames_t(columnViews.begin(), columnViews.end());
1281  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3>(userColumns, h);
1282  }
1283 
1284  ////////////////////////////////////////////////////////////////////////////
1285  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1286  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1287  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1288  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1289  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1290  /// \param[in] model The returned histogram will be constructed using this as a model.
1291  /// \param[in] v1Name The name of the column that will fill the x axis.
1292  /// \param[in] v2Name The name of the column that will fill the y axis.
1293  /// \param[in] v3Name The name of the column that will fill the z axis.
1294  /// \param[in] wName The name of the column that will provide the weights.
1295  ///
1296  /// This action is *lazy*: upon invocation of this method the calculation is
1297  /// booked but not executed. See TResultProxy documentation.
1298  /// The user gives up ownership of the model profile.
1299  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1300  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1302  std::string_view v3Name, std::string_view wName)
1303  {
1304  std::shared_ptr<::TProfile2D> h(nullptr);
1305  {
1306  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1307  h = model.GetProfile();
1308  }
1309 
1310  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1311  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1312  }
1313  auto columnViews = {v1Name, v2Name, v3Name, wName};
1314  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1315  ? ColumnNames_t()
1316  : ColumnNames_t(columnViews.begin(), columnViews.end());
1317  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3, W>(userColumns, h);
1318  }
1319 
1320  template <typename V1, typename V2, typename V3, typename W>
1322  {
1323  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1324  }
1325 
1326  ////////////////////////////////////////////////////////////////////////////
1327  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1328  ///
1329  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1330  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1331  /// specified columns (these types are passed as template parameters to this method).
1332  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1333  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1334  /// \tparam T The type of the object to fill. Automatically deduced.
1335  /// \param[in] model The model to be considered to build the new return value.
1336  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1337  ///
1338  /// The user gives up ownership of the model object.
1339  /// The list of column names to be used for filling must always be specified.
1340  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1341  /// See TResultProxy documentation.
1342  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1343  TResultProxy<T> Fill(T &&model, const ColumnNames_t &columnList)
1344  {
1345  auto h = std::make_shared<T>(std::move(model));
1346  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1347  throw std::runtime_error("The absence of axes limits is not supported yet.");
1348  }
1349  return CreateAction<TDFInternal::ActionTypes::Fill, FirstColumn, OtherColumns...>(columnList, h);
1350  }
1351 
1352  ////////////////////////////////////////////////////////////////////////////
1353  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1354  ///
1355  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1356  /// method with these types. See previous overload for more information.
1357  /// \tparam T The type of the object to fill. Automatically deduced.
1358  /// \param[in] model The model to be considered to build the new return value.
1359  /// \param[in] columnList The name of the columns read to fill the object.
1360  ///
1361  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1362  /// previous overload. Check the previous overload for more details on `Fill`.
1363  template <typename T>
1365  {
1366  auto h = std::make_shared<T>(std::move(model));
1367  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1368  throw std::runtime_error("The absence of axes limits is not supported yet.");
1369  }
1370  return CreateAction<TDFInternal::ActionTypes::Fill, TDFDetail::TInferType>(bl, h, bl.size());
1371  }
1372 
1373  ////////////////////////////////////////////////////////////////////////////
1374  /// \brief Return the minimum of processed column values (*lazy action*)
1375  /// \tparam T The type of the branch/column.
1376  /// \param[in] columnName The name of the branch/column to be treated.
1377  ///
1378  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1379  /// template specialization of this method.
1380  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1381  ///
1382  /// This action is *lazy*: upon invocation of this method the calculation is
1383  /// booked but not executed. See TResultProxy documentation.
1384  template <typename T = TDFDetail::TInferType>
1386  {
1387  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1388  using RetType_t = TDFDetail::MinReturnType_t<T>;
1389  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1390  return CreateAction<TDFInternal::ActionTypes::Min, T>(userColumns, minV);
1391  }
1392 
1393  ////////////////////////////////////////////////////////////////////////////
1394  /// \brief Return the maximum of processed column values (*lazy action*)
1395  /// \tparam T The type of the branch/column.
1396  /// \param[in] columnName The name of the branch/column to be treated.
1397  ///
1398  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1399  /// template specialization of this method.
1400  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1401  ///
1402  /// This action is *lazy*: upon invocation of this method the calculation is
1403  /// booked but not executed. See TResultProxy documentation.
1404  template <typename T = TDFDetail::TInferType>
1406  {
1407  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1408  using RetType_t = TDFDetail::MaxReturnType_t<T>;
1409  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1410  return CreateAction<TDFInternal::ActionTypes::Max, T>(userColumns, maxV);
1411  }
1412 
1413  ////////////////////////////////////////////////////////////////////////////
1414  /// \brief Return the mean of processed column values (*lazy action*)
1415  /// \tparam T The type of the branch/column.
1416  /// \param[in] columnName The name of the branch/column to be treated.
1417  ///
1418  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1419  /// template specialization of this method.
1420  ///
1421  /// This action is *lazy*: upon invocation of this method the calculation is
1422  /// booked but not executed. See TResultProxy documentation.
1423  template <typename T = TDFDetail::TInferType>
1425  {
1426  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1427  auto meanV = std::make_shared<double>(0);
1428  return CreateAction<TDFInternal::ActionTypes::Mean, T>(userColumns, meanV);
1429  }
1430 
1431  // clang-format off
1432  ////////////////////////////////////////////////////////////////////////////
1433  /// \brief Return the sum of processed column values (*lazy action*)
1434  /// \tparam T The type of the branch/column.
1435  /// \param[in] columnName The name of the branch/column.
1436  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1437  ///
1438  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1439  /// template specialization of this method.
1440  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1441  ///
1442  /// This action is *lazy*: upon invocation of this method the calculation is
1443  /// booked but not executed. See TResultProxy documentation.
1444  template <typename T = TDFDetail::TInferType>
1446  Sum(std::string_view columnName = "",
1448  {
1449  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1450  auto sumV = std::make_shared<TDFDetail::SumReturnType_t<T>>(initValue);
1451  return CreateAction<TDFInternal::ActionTypes::Sum, T>(userColumns, sumV);
1452  }
1453  // clang-format on
1454 
1455  ////////////////////////////////////////////////////////////////////////////
1456  /// \brief Print filtering statistics on screen
1457  ///
1458  /// Calling `Report` on the main `TDataFrame` object prints stats for
1459  /// all named filters in the call graph. Calling this method on a
1460  /// stored chain state (i.e. a graph node different from the first) prints
1461  /// the stats for all named filters in the chain section between the original
1462  /// `TDataFrame` and that node (included). Stats are printed in the same
1463  /// order as the named filters have been added to the graph.
1464  void Report()
1465  {
1466  // if this is a TInterface<TLoopManager> on which `Define` has been called, users
1467  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1468  // certainly does not contain named filters.
1469  // The number 2 takes into account the implicit columns for entry and slot number
1470  if (std::is_same<Proxied, TLoopManager>::value && fValidCustomColumns.size() > 2)
1471  return;
1472 
1473  auto df = GetDataFrameChecked();
1474  // TODO: we could do better and check if the Report was asked for Filters that have already run
1475  // and in that case skip the event-loop
1476  if (df->MustRunNamedFilters())
1477  df->Run();
1478  fProxiedPtr->Report();
1479  }
1480 
1481  /////////////////////////////////////////////////////////////////////////////
1482  /// \brief Returns the names of the available columns
1483  ///
1484  /// This is not an action nor a transformation, just a simple utility to
1485  /// get columns names out of the TDataFrame nodes.
1487  {
1488  ColumnNames_t allColumns;
1489 
1490  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1491  if (!TDFInternal::IsInternalColumn(colName))
1492  allColumns.emplace_back(colName);
1493  };
1494 
1495  std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal);
1496 
1497  auto df = GetDataFrameChecked();
1498  auto tree = df->GetTree();
1499  if (tree) {
1500  auto branchNames = TDFInternal::GetBranchNames(*tree);
1501  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1502  }
1503 
1504  if (fDataSource) {
1505  auto &dsColNames = fDataSource->GetColumnNames();
1506  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1507  }
1508 
1509  return allColumns;
1510  }
1511 
1512 private:
1514  {
1515  auto lm = GetDataFrameChecked();
1516  ColumnNames_t validColNames = {};
1517 
1518  // Entry number column
1519  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
1520  const auto entryColName = "tdfentry_";
1522  lm->Book(std::make_shared<EntryCol_t>(entryColName, std::move(entryColGen), validColNames, lm.get()));
1523  fValidCustomColumns.emplace_back(entryColName);
1524 
1525  // Slot number column
1526  auto slotColGen = [](unsigned int slot) { return slot; };
1527  const auto slotColName = "tdfslot_";
1529  lm->Book(std::make_shared<SlotCol_t>(slotColName, std::move(slotColGen), validColNames, lm.get()));
1530  fValidCustomColumns.emplace_back(slotColName);
1531  }
1532 
1534  {
1535  const auto theRegexSize = columnNameRegexp.size();
1536  std::string theRegex(columnNameRegexp);
1537 
1538  const auto isEmptyRegex = 0 == theRegexSize;
1539  // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
1540  if (theRegexSize > 0 && theRegex[0] != '^')
1541  theRegex = "^" + theRegex;
1542  if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
1543  theRegex = theRegex + "$";
1544 
1545  ColumnNames_t selectedColumns;
1546  selectedColumns.reserve(32);
1547 
1548  // Since we support gcc48 and it does not provide in its stl std::regex,
1549  // we need to use TRegexp
1550  TRegexp regexp(theRegex);
1551  int dummy;
1552  for (auto &&branchName : fValidCustomColumns) {
1553  if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) &&
1555  selectedColumns.emplace_back(branchName);
1556  }
1557  }
1558 
1559  auto df = GetDataFrameChecked();
1560  auto tree = df->GetTree();
1561  if (tree) {
1562  auto branchNames = TDFInternal::GetBranchNames(*tree);
1563  for (auto &branchName : branchNames) {
1564  if (isEmptyRegex || -1 != regexp.Index(branchName, &dummy)) {
1565  selectedColumns.emplace_back(branchName);
1566  }
1567  }
1568  }
1569 
1570  if (fDataSource) {
1571  auto &dsColNames = fDataSource->GetColumnNames();
1572  for (auto &dsColName : dsColNames) {
1573  if (isEmptyRegex || -1 != regexp.Index(dsColName, &dummy)) {
1574  selectedColumns.emplace_back(dsColName);
1575  }
1576  }
1577  }
1578 
1579  if (selectedColumns.empty()) {
1580  std::string text(callerName);
1581  if (columnNameRegexp.empty()) {
1582  text = ": there is no column available to match.";
1583  } else {
1584  text = ": regex \"" + columnNameRegexp + "\" did not match any column.";
1585  }
1586  throw std::runtime_error(text);
1587  }
1588  return selectedColumns;
1589  }
1590 
1592  std::string_view returnTypeName)
1593  {
1594  auto df = GetDataFrameChecked();
1595  auto &aliasMap = df->GetAliasMap();
1596  auto tree = df->GetTree();
1598  const auto &customColumns = df->GetCustomColumnNames();
1599  auto tmpBookedBranches = df->GetBookedColumns();
1600  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
1602  upcastNode, fImplWeakPtr, fValidCustomColumns, fDataSource);
1603  const auto thisTypeName = "ROOT::Experimental::TDF::TInterface<" + upcastInterface.GetNodeTypeName() + ">";
1604  return TDFInternal::JitTransformation(&upcastInterface, transformation, thisTypeName, nodeName, expression,
1605  aliasMap, branches, customColumns, tmpBookedBranches, tree, returnTypeName,
1606  fDataSource);
1607  }
1608 
1609  /// Return string containing fully qualified type name of the node pointed by fProxied.
1610  /// The method is only defined for TInterface<{TFilterBase,TCustomColumnBase,TRangeBase,TLoopManager}> as it should
1611  /// only be called on "upcast" TInterfaces.
1612  inline static std::string GetNodeTypeName();
1613 
1614  // Type was specified by the user, no need to infer it
1615  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1616  typename std::enable_if<!TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1617  TResultProxy<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
1618  {
1619  auto lm = GetDataFrameChecked();
1620  constexpr auto nColumns = sizeof...(BranchTypes);
1621  const auto selectedCols =
1622  TDFInternal::GetValidatedColumnNames(*lm, nColumns, columns, fValidCustomColumns, fDataSource);
1623  if (fDataSource)
1624  TDFInternal::DefineDataSourceColumns(selectedCols, *lm, TDFInternal::GenStaticSeq_t<nColumns>(),
1625  TDFInternal::TypeList<BranchTypes...>(), *fDataSource);
1626  const auto nSlots = fProxiedPtr->GetNSlots();
1627  auto actionPtr =
1628  TDFInternal::BuildAndBook<BranchTypes...>(selectedCols, r, nSlots, *lm, *fProxiedPtr, (ActionType *)nullptr);
1629  return MakeResultProxy(r, lm, actionPtr);
1630  }
1631 
1632  // User did not specify type, do type inference
1633  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
1634  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
1635  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1636  typename std::enable_if<TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1638  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
1639  {
1640  auto lm = GetDataFrameChecked();
1641  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
1642  const auto validColumnNames =
1643  TDFInternal::GetValidatedColumnNames(*lm, realNColumns, columns, fValidCustomColumns, fDataSource);
1644  const unsigned int nSlots = fProxiedPtr->GetNSlots();
1645  const auto &customColumns = lm->GetBookedColumns();
1646  auto tree = lm->GetTree();
1647  auto rOnHeap = TDFInternal::MakeSharedOnHeap(r);
1648  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
1650  upcastNode, fImplWeakPtr, fValidCustomColumns, fDataSource);
1651  auto resultProxyAndActionPtrPtr = MakeResultProxy(r, lm);
1652  auto &resultProxy = resultProxyAndActionPtrPtr.first;
1653  auto actionPtrPtrOnHeap = TDFInternal::MakeSharedOnHeap(resultProxyAndActionPtrPtr.second);
1654  auto toJit = TDFInternal::JitBuildAndBook(validColumnNames, upcastInterface.GetNodeTypeName(), upcastNode.get(),
1655  typeid(std::shared_ptr<ActionResultType>), typeid(ActionType), rOnHeap,
1656  tree, nSlots, customColumns, fDataSource, actionPtrPtrOnHeap);
1657  lm->Jit(toJit);
1658  return resultProxy;
1659  }
1660 
1661  template <typename F, typename ExtraArgs>
1662  typename std::enable_if<std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1663  TInterface<Proxied>>::type
1664  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
1665  {
1666  auto loopManager = GetDataFrameChecked();
1667  TDFInternal::CheckCustomColumn(name, loopManager->GetTree(), loopManager->GetCustomColumnNames(),
1668  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
1669 
1670  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
1671  using ColTypesTmp_t =
1672  typename TDFInternal::RemoveFirstParameterIf<std::is_same<ExtraArgs, TDFDetail::TCCHelperTypes::TSlot>::value,
1673  ArgTypes_t>::type;
1674  using ColTypes_t = typename TDFInternal::RemoveFirstTwoParametersIf<
1675  std::is_same<ExtraArgs, TDFDetail::TCCHelperTypes::TSlotAndEntry>::value, ColTypesTmp_t>::type;
1676 
1677  constexpr auto nColumns = ColTypes_t::list_size;
1678  const auto validColumnNames =
1679  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
1680  if (fDataSource)
1681  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
1682  ColTypes_t(), *fDataSource);
1683  using NewCol_t = TDFDetail::TCustomColumn<F, ExtraArgs>;
1684 
1685  // Here we check if the return type is a pointer. In this case, we assume it points to valid memory
1686  // and we treat the column as if it came from a data source, i.e. it points to valid memory.
1687  loopManager->Book(std::make_shared<NewCol_t>(name, std::move(expression), validColumnNames, loopManager.get()));
1688  TInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
1689  newInterface.fValidCustomColumns.emplace_back(name);
1690  return newInterface;
1691  }
1692 
1693  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
1694  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
1695  // this way compilation of `Define` has no way to continue after throwing the error.
1696  template <
1697  typename F, typename ExtraArgs,
1698  typename std::enable_if<!std::is_convertible<F, std::string>::value &&
1699  !std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1700  int>::type = 0>
1702  {
1703  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1704  "Error in `Define`: type returned by expression is not default-constructible");
1705  return *this; // never reached
1706  }
1707 
1708  ////////////////////////////////////////////////////////////////////////////
1709  /// \brief Implementation of snapshot
1710  /// \param[in] treename The name of the TTree
1711  /// \param[in] filename The name of the TFile
1712  /// \param[in] columnList The list of names of the branches to be written
1713  /// The implementation exploits Foreach. The association of the addresses to
1714  /// the branches takes place at the first event. This is possible because
1715  /// since there are no copies, the address of the value passed by reference
1716  /// is the address pointing to the storage of the read/created object in/by
1717  /// the TTreeReaderValue/TemporaryBranch
1718  template <typename... BranchTypes>
1720  const ColumnNames_t &columnList, const TSnapshotOptions &options)
1721  {
1722  TDFInternal::CheckSnapshot(sizeof...(BranchTypes), columnList.size());
1723 
1724  auto df = GetDataFrameChecked();
1725  auto validCols =
1726  TDFInternal::GetValidatedColumnNames(*df, columnList.size(), columnList, fValidCustomColumns, fDataSource);
1727 
1728  if (fDataSource)
1729  TDFInternal::DefineDataSourceColumns(validCols, *df, TDFInternal::GenStaticSeq_t<sizeof...(BranchTypes)>(),
1730  TTraits::TypeList<BranchTypes...>(), *fDataSource);
1731 
1732  const std::string fullTreename(treename);
1733  // split name into directory and treename if needed
1734  const auto lastSlash = treename.rfind('/');
1735  std::string_view dirname = "";
1736  if (std::string_view::npos != lastSlash) {
1737  dirname = treename.substr(0, lastSlash);
1738  treename = treename.substr(lastSlash + 1, treename.size());
1739  }
1740 
1741  // add action node to functional graph and run event loop
1742  std::shared_ptr<TDFInternal::TActionBase> actionPtr;
1743  if (!ROOT::IsImplicitMTEnabled()) {
1744  // single-thread snapshot
1745  using Helper_t = TDFInternal::SnapshotHelper<BranchTypes...>;
1746  using Action_t = TDFInternal::TAction<Helper_t, Proxied, TTraits::TypeList<BranchTypes...>>;
1747  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
1748  *fProxiedPtr));
1749  } else {
1750  // multi-thread snapshot
1751  using Helper_t = TDFInternal::SnapshotHelperMT<BranchTypes...>;
1752  using Action_t = TDFInternal::TAction<Helper_t, Proxied>;
1753  actionPtr.reset(new Action_t(
1754  Helper_t(fProxiedPtr->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
1755  *fProxiedPtr));
1756  }
1757  df->Book(std::move(actionPtr));
1758  df->Run();
1759 
1760  // create new TDF
1762  // Now we mimic a constructor for the TDataFrame. We cannot invoke it here
1763  // since this would introduce a cyclic headers dependency.
1764  TInterface<TLoopManager> snapshotTDF(std::make_shared<TLoopManager>(nullptr, validCols));
1765  auto chain = std::make_shared<TChain>(fullTreename.c_str());
1766  chain->Add(std::string(filename).c_str());
1767  snapshotTDF.fProxiedPtr->SetTree(chain);
1768 
1769  return snapshotTDF;
1770  }
1771 
1772  ////////////////////////////////////////////////////////////////////////////
1773  /// \brief Implementation of cache
1774  template <typename... BranchTypes, int... S>
1775  TInterface<TLoopManager> CacheImpl(const ColumnNames_t &columnList, TDFInternal::StaticSeq<S...> s)
1776  {
1777 
1778  // Check at compile time that the columns types are copy constructible
1779  constexpr bool areCopyConstructible =
1780  TDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
1781  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
1782 
1783  // We share bits and pieces with snapshot. De facto this is a snapshot
1784  // in memory!
1785  TDFInternal::CheckSnapshot(sizeof...(BranchTypes), columnList.size());
1786  if (fDataSource) {
1787  auto lm = GetDataFrameChecked();
1788  TDFInternal::DefineDataSourceColumns(columnList, *lm, s, TTraits::TypeList<BranchTypes...>(), *fDataSource);
1789  }
1790  std::tuple<
1792  colHolders;
1793 
1794  // TODO: really fix the type of the Take....
1795  std::initializer_list<int> expander0{(
1796  // This gets expanded
1797  std::get<S>(colHolders).fContent = std::move(
1798  Take<typename std::decay<decltype(std::get<S>(colHolders))>::type::value_type>(columnList[S]).GetValue()),
1799  0)...};
1800  (void)expander0;
1801 
1802  auto nEntries = std::get<0>(colHolders).fContent.size();
1803 
1804  TInterface<TLoopManager> cachedTDF(std::make_shared<TLoopManager>(nEntries));
1805  const ColumnNames_t noCols = {};
1806 
1807  // Now we define the data columns. We add the name of the valid custom columns by hand later.
1808  auto lm = cachedTDF.GetDataFrameChecked();
1809  std::initializer_list<int> expander1{(
1810  // This gets expanded
1811  lm->Book(
1812  std::make_shared<TDFDetail::TCustomColumn<typename std::decay<decltype(std::get<S>(colHolders))>::type,
1813  TDFDetail::TCCHelperTypes::TSlotAndEntry>>(
1814  columnList[S], std::move(std::get<S>(colHolders)), noCols, lm.get(), true)),
1815  0)...};
1816  (void)expander1;
1817 
1818  // Add the defined columns
1819  auto &vc = cachedTDF.fValidCustomColumns;
1820  vc.insert(vc.end(), columnList.begin(), columnList.end());
1821  return cachedTDF;
1822  }
1823 
1824 protected:
1825  /// Get the TLoopManager if reachable. If not, throw.
1826  std::shared_ptr<TLoopManager> GetDataFrameChecked()
1827  {
1828  auto df = fImplWeakPtr.lock();
1829  if (!df) {
1830  throw std::runtime_error("The main TDataFrame is not reachable: did it go out of scope?");
1831  }
1832  return df;
1833  }
1834 
1835  TInterface(const std::shared_ptr<Proxied> &proxied, const std::weak_ptr<TLoopManager> &impl,
1836  const ColumnNames_t &validColumns, TDataSource *ds)
1837  : fProxiedPtr(proxied), fImplWeakPtr(impl), fValidCustomColumns(validColumns), fDataSource(ds)
1838  {
1839  }
1840 
1841  /// Only enabled when building a TInterface<TLoopManager>
1842  template <typename T = Proxied, typename std::enable_if<std::is_same<T, TLoopManager>::value, int>::type = 0>
1843  TInterface(const std::shared_ptr<Proxied> &proxied)
1844  : fProxiedPtr(proxied), fImplWeakPtr(proxied->GetSharedPtr()), fValidCustomColumns(),
1845  fDataSource(proxied->GetDataSource())
1846  {
1847  AddDefaultColumns();
1848  }
1849 
1850  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
1851 };
1852 
1853 template <>
1855 {
1856  return "ROOT::Detail::TDF::TFilterBase";
1857 }
1858 
1859 template <>
1861 {
1862  return "ROOT::Detail::TDF::TLoopManager";
1863 }
1864 
1865 template <>
1867 {
1868  return "ROOT::Detail::TDF::TRangeBase";
1869 }
1870 
1871 } // end NS TDF
1872 } // end NS Experimental
1873 } // end NS ROOT
1874 
1875 #endif // ROOT_TDF_INTERFACE
ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp, std::string_view callerName)
TResultProxy< TDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const TDFDetail::SumReturnType_t< T > &initValue=TDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:185
TInterface(const std::shared_ptr< Proxied > &proxied, const std::weak_ptr< TLoopManager > &impl, const ColumnNames_t &validColumns, TDataSource *ds)
TTraits::TakeFirstParameter_t< T > type
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
The contained type alias is double if T == TInferType, U if T == std::container<U>, T otherwise.
TResultProxy< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
A collection of options to steer the creation of the dataset on file.
Definition: TDFUtils.hxx:38
std::shared_ptr<::TH1D > GetHistogram() const
An array of TObjects.
Definition: TObjArray.h:37
A struct which stores the parameters of a TH2D.
std::enable_if< std::is_default_constructible< typename TTraits::CallableTraits< F >::ret_type >::value, TInterface< Proxied > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::vector< value_type > fContent
TInterface< TLoopManager > CacheImpl(const ColumnNames_t &columnList, TDFInternal::StaticSeq< S... > s)
Implementation of cache.
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
TInterface< TLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
typename TDFInternal::ValueType< T >::value_type RealT_t
const std::shared_ptr< Proxied > & GetProxiedPtr() const
std::shared_ptr<::TH3D > GetHistogram() const
TResultProxy< ULong64_t > Count()
Return the number of entries processed (lazy action)
typename TakeFirstParameter< T >::type TakeFirstParameter_t
Definition: TypeTraits.hxx:171
TResultProxy< T > MakeResultProxy(const std::shared_ptr< T > &r, const std::shared_ptr< TLoopManager > &df, TDFInternal::TActionBase *actionPtr)
Create a TResultProxy and set its pointer to the corresponding TAction This overload is invoked by no...
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1193
double T(double x)
Definition: ChebyshevPol.h:34
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
TInterface< TLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
TH1 * h
Definition: legend2.C:5
TResultProxy< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const ColumnNames_t &dataSourceColumns)
Definition: TDFUtils.cxx:274
TResultProxy< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Regular expression class.
Definition: TRegexp.h:31
std::shared_ptr< TLoopManager > UpcastNode(const std::shared_ptr< TLoopManager > ptr)
Long_t CallJitTransformation(std::string_view transformation, std::string_view nodeName, std::string_view expression, std::string_view returnTypeName)
TActionBase * BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr< double > &meanV, const unsigned int nSlots, TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Mean *)
TResultProxy< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
#define gInterpreter
Definition: TInterpreter.h:526
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action) ...
TInterface< Proxied > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry...
A struct which stores the parameters of a TProfile2D.
void DefineDSColumnHelper(std::string_view name, TLoopManager &lm, TDataSource &ds)
Helper function to be used by DefineDataSourceColumns
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model)
A struct which stores the parameters of a TH3D.
void Book(const ActionBasePtr_t &actionPtr)
Definition: TDFNodes.cxx:434
TInterface< Proxied > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
std::vector< std::string > FindUsedColumnNames(std::string_view, TObjArray *, const std::vector< std::string > &)
void AddDataSourceColumn(std::string_view name)
Definition: TDFNodes.hxx:198
MinReturnType_t< T > MaxReturnType_t
TInterface< TLoopManager > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options)
Implementation of snapshot.
Double_t GetXmin() const
Definition: TAxis.h:133
typename TDFInternal::TMinReturnType< T >::type MinReturnType_t
The aliased type is double if T == TInferType, U if T == container<U>, T otherwise.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
void DefineDataSourceColumns(const std::vector< std::string > &columns, TLoopManager &lm, StaticSeq< S... >, TTraits::TypeList< ColumnTypes... >, TDataSource &ds)
Take a list of data-source column names and define the ones that haven&#39;t been defined yet...
static std::string GetNodeTypeName()
Return string containing fully qualified type name of the node pointed by fProxied.
unsigned int GetNSlots() const
Definition: TDFNodes.hxx:368
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode, const std::type_info &art, const std::type_info &at, const void *r, TTree *tree, const unsigned int nSlots, const std::map< std::string, TmpBranchBasePtr_t > &customColumns, TDataSource *ds, const std::shared_ptr< TActionBase *> *const actionPtrPtr)
TResultProxy<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
TResultProxy<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
const ColumnNames_t & GetCustomColumnNames() const
Definition: TDFNodes.hxx:175
std::vector< RealT_t > VTColl_t
typename std::conditional< isAB &&TDFInternal::IsVector_t< COLL >::value, std::vector< VTColl_t >, COLL >::type NewC0_t
bool IsInternalColumn(std::string_view colName)
Definition: TDFUtils.cxx:359
ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, TDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
TResultProxy< TDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
typename std::conditional< isAB &&TDFInternal::IsList_t< NewC0_t >::value, std::list< VTColl_t >, NewC0_t >::type NewC1_t
TResultProxy< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RooArgSet S(const RooAbsArg &v1)
Smart pointer for the return type of actions.
#define F(x, y, z)
std::shared_ptr< T > * MakeSharedOnHeap(const std::shared_ptr< T > &shPtr)
std::string printValue(const TDatime *val)
Print a TDatime at the prompt.
Definition: TDatime.cxx:514
TResultProxy< TDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
TResultProxy<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
TInterface< TLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
ROOT::R::TRInterface & r
Definition: Object.C:4
TDataSource defines an API that TDataFrame can use to read arbitrary data formats.
Definition: TDataSource.hxx:51
TInterface< TDFDetail::TRange< Proxied > > Range(unsigned int start, unsigned int stop, unsigned int stride=1)
Creates a node that filters entries based on range.
TResultProxy<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action) ...
TResultProxy<::TH3D > Histo3D(const TH3DModel &model)
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
TResultProxy<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: TMath.h:972
std::shared_ptr<::TProfile > GetProfile() const
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
TInterface< Proxied > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
A struct which stores the parameters of a TProfile.
std::shared_ptr< TLoopManager > GetDataFrameChecked()
Get the TLoopManager if reachable. If not, throw.
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, std::string_view name)
Append a filter to the call graph.
TResultProxy<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model)
const std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this TInterface.
TResultProxy<::TH1D > Histo1D(std::string_view vName)
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27
TResultProxy< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
long Long_t
Definition: RtypesCore.h:50
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
TInterface< TFilterBase > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:209
TText * text
int type
Definition: TGX11.cxx:120
unsigned long long ULong64_t
Definition: RtypesCore.h:70
Print a TSeq at the prompt:
Definition: TDatime.h:115
void Run(unsigned int slot, Long64_t entry) final
Definition: TDFNodes.hxx:401
typename TTakeRealTypes< T, C >::RealColl_t ColType_t
void Report()
Print filtering statistics on screen.
const std::weak_ptr< TLoopManager > fImplWeakPtr
Weak pointer to the TLoopManager at the root of the graph.
static RooMathCoreReg dummy
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
static constexpr double s
Extract types from the signature of a callable object. See CallableTraits.
Definition: TypeTraits.hxx:38
TResultProxy<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
Binding & operator=(OUT(*fun)(void))
TResultProxy< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
TResultProxy<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
typedef void((*Func_t)())
void CheckSnapshot(unsigned int nTemplateParams, unsigned int nColumnNames)
Definition: TDFUtils.cxx:300
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch, TDataSource *ds)
Return a string containing the type of the given branch.
Definition: TDFUtils.cxx:123
std::vector< T ** > GetColumnReaders(std::string_view columnName)
Called at most once per column by TDF.
Definition: TDataSource.hxx:78
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:584
std::shared_ptr<::TProfile2D > GetProfile() const
ROOT&#39;s TDataFrame offers a high level interface for analyses of data stored in TTrees.
Definition: TDataFrame.hxx:39
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
A struct which stores the parameters of a TH1D.
const Int_t kError
Definition: TError.h:39
ColumnNames_t fValidCustomColumns
Names of columns Defined for this branch of the functional graph.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TInterfaceJittedDefine Define(std::string_view name, std::string_view expression)
Creates a custom column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: tree.py:1
typename std::conditional< isAB &&TDFInternal::IsDeque_t< NewC1_t >::value, std::deque< VTColl_t >, NewC1_t >::type NewC2_t
std::shared_ptr<::TH2D > GetHistogram() const
std::shared_ptr< TCustomColumnBase > TmpBranchBasePtr_t
Definition: first.py:1
TInterface< Proxied > DefineImpl(std::string_view, F, const ColumnNames_t &={})
void CallBuildAndBook(PrevNodeType &prevNode, const ColumnNames_t &bl, const unsigned int nSlots, const std::shared_ptr< ActionResultType > *rOnHeap, const std::shared_ptr< TActionBase *> *actionPtrPtrOnHeap)
Convenience function invoked by jitted code to build action nodes at runtime.
Long_t JitTransformation(void *thisPtr, std::string_view methodName, std::string_view interfaceTypeName, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const std::vector< std::string > &customColumns, const std::map< std::string, TmpBranchBasePtr_t > &tmpBookedBranches, TTree *tree, std::string_view returnTypeName, TDataSource *ds)
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset&#39;s column names.
value_type * operator()(unsigned int, ULong64_t iEvent)
TInterface< TDFDetail::TRange< Proxied > > Range(unsigned int stop)
Creates a node that filters entries based on range.
Double_t GetXmax() const
Definition: TAxis.h:134
MinReturnType_t< T > SumReturnType_t
ColumnNames_t GetBranchNames(TTree &t)
Get all the branches names, including the ones of the friend trees.
Definition: TDFUtils.cxx:265
char name[80]
Definition: TGX11.cxx:109
The public interface to the TDataFrame federation of classes.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
TInterface< Proxied > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
TResultProxy<::TH2D > Histo2D(const TH2DModel &model)
TResultProxy< typename TDFDetail::ColType_t< T, COLL > > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default) ...
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
TInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a TInterface<TLoopManager>