Logo ROOT   6.18/05
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RRange.hxx"
20#include "ROOT/RDF/Utils.hxx"
23#include "ROOT/RResultPtr.hxx"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/TypeTraits.hxx"
27#include "RtypesCore.h" // for ULong64_t
28#include "TH1.h" // For Histo actions
29#include "TH2.h" // For Histo actions
30#include "TH3.h" // For Histo actions
31#include "TProfile.h"
32#include "TProfile2D.h"
33#include "TStatistic.h"
34
35#include <algorithm>
36#include <cstddef>
37#include <initializer_list>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <stdexcept>
42#include <string>
43#include <type_traits> // is_same, enable_if
44#include <typeinfo>
45#include <vector>
46
47class TGraph;
48
49// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50namespace ROOT {
53void EnableImplicitMT(UInt_t numthreads);
54class RDataFrame;
55namespace Internal {
56namespace RDF {
58}
59}
60}
61namespace cling {
62std::string printValue(ROOT::RDataFrame *tdf);
63}
64
65namespace ROOT {
66namespace RDF {
69namespace TTraits = ROOT::TypeTraits;
70
71template <typename Proxied, typename DataSource>
72class RInterface;
73
74using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75
76// clang-format off
77/**
78 * \class ROOT::RDF::RInterface
79 * \ingroup dataframe
80 * \brief The public interface to the RDataFrame federation of classes
81 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83 *
84 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86 */
87// clang-format on
88template <typename Proxied, typename DataSource = void>
90 using DS_t = DataSource;
95 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
97
98 template <typename T, typename W>
99 friend class RInterface;
100
101 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102 ///< The RLoopManager at the root of this computation graph. Never null.
104 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
106
107 /// Contains the custom columns defined up to this node.
109
110public:
111 ////////////////////////////////////////////////////////////////////////////
112 /// \brief Copy-assignment operator for RInterface.
113 RInterface &operator=(const RInterface &) = default;
114
115 ////////////////////////////////////////////////////////////////////////////
116 /// \brief Copy-ctor for RInterface.
117 RInterface(const RInterface &) = default;
118
119 ////////////////////////////////////////////////////////////////////////////
120 /// \brief Move-ctor for RInterface.
121 RInterface(RInterface &&) = default;
122
123 ////////////////////////////////////////////////////////////////////////////
124 /// \brief Only enabled when building a RInterface<RLoopManager>
125 template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126 RInterface(const std::shared_ptr<Proxied> &proxied)
127 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128 {
130 }
131
132 ////////////////////////////////////////////////////////////////////////////
133 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134 /// Different RDataFrame methods return different C++ types. All nodes, however,
135 /// can be cast to this common type at the cost of a small performance penalty.
136 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137 /// around via (non-template, C++11) helper functions.
138 /// Example usage:
139 /// ~~~{.cpp}
140 /// // a function that conditionally adds a Range to a RDataFrame node.
141 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142 /// {
143 /// return mustAddRange ? df.Range(1) : df;
144 /// }
145 /// // use as :
146 /// ROOT::RDataFrame df(10);
147 /// auto maybeRanged = MaybeAddRange(df, true);
148 /// ~~~
149 /// Note that it is not a problem to pass RNode's by value.
150 operator RNode() const
151 {
152 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
154 }
155
156 ////////////////////////////////////////////////////////////////////////////
157 /// \brief Append a filter to the call graph.
158 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159 /// signalling whether the event has passed the selection (true) or not (false).
160 /// \param[in] columns Names of the columns/branches in input to the filter function.
161 /// \param[in] name Optional name of this filter. See `Report`.
162 /// \return the filter node of the computation graph.
163 ///
164 /// Append a filter node at the point of the call graph corresponding to the
165 /// object this method is called on.
166 /// The callable `f` should not have side-effects (e.g. modification of an
167 /// external or static variable) to ensure correct results when implicit
168 /// multi-threading is active.
169 ///
170 /// RDataFrame only evaluates filters when necessary: if multiple filters
171 /// are chained one after another, they are executed in order and the first
172 /// one returning false causes the event to be discarded.
173 /// Even if multiple actions or transformations depend on the same filter,
174 /// it is executed once per entry. If its result is requested more than
175 /// once, the cached result is served.
176 ///
177 /// ### Example usage:
178 /// ~~~{.cpp}
179 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180 /// auto filtered = df.Filter(myCut, {"x", "y"});
181 ///
182 /// // String: it must contain valid C++ except that column names can be used instead of variable names
183 /// auto filtered = df.Filter("x*y > 0");
184 /// ~~~
185 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
187 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188 {
189 RDFInternal::CheckFilter(f);
190 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191 constexpr auto nColumns = ColTypes_t::list_size;
192 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193 const auto newColumns =
194 CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195
197
198 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199 fLoopManager->Book(filterPtr.get());
200 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201 }
202
203 ////////////////////////////////////////////////////////////////////////////
204 /// \brief Append a filter to the call graph.
205 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206 /// signalling whether the event has passed the selection (true) or not (false).
207 /// \param[in] name Optional name of this filter. See `Report`.
208 /// \return the filter node of the computation graph.
209 ///
210 /// Refer to the first overload of this method for the full documentation.
211 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
213 {
214 // The sfinae is there in order to pick up the overloaded method which accepts two strings
215 // rather than this template method.
216 return Filter(f, {}, name);
217 }
218
219 ////////////////////////////////////////////////////////////////////////////
220 /// \brief Append a filter to the call graph.
221 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222 /// signalling whether the event has passed the selection (true) or not (false).
223 /// \param[in] columns Names of the columns/branches in input to the filter function.
224 /// \return the filter node of the computation graph.
225 ///
226 /// Refer to the first overload of this method for the full documentation.
227 template <typename F>
228 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229 {
230 return Filter(f, ColumnNames_t{columns});
231 }
232
233 ////////////////////////////////////////////////////////////////////////////
234 /// \brief Append a filter to the call graph.
235 /// \param[in] expression The filter expression in C++
236 /// \param[in] name Optional name of this filter. See `Report`.
237 /// \return the filter node of the computation graph.
238 ///
239 /// The expression is just-in-time compiled and used to filter entries. It must
240 /// be valid C++ syntax in which variable names are substituted with the names
241 /// of branches/columns.
242 ///
243 /// ### Example usage:
244 /// ~~~{.cpp}
245 /// auto filtered_df = df.Filter("myCollection.size() > 3");
246 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247 /// ~~~
249 {
250 // deleted by the jitted call to JitFilterHelper
251 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255
256 RDFInternal::BookFilterJit(jittedFilter.get(), upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
259
260 fLoopManager->Book(jittedFilter.get());
263 }
264
265 // clang-format off
266 ////////////////////////////////////////////////////////////////////////////
267 /// \brief Creates a custom column
268 /// \param[in] name The name of the custom column.
269 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
270 /// \param[in] columns Names of the columns/branches in input to the producer function.
271 /// \return the first node of the computation graph for which the new quantity is defined.
272 ///
273 /// Create a custom column that will be visible from all subsequent nodes
274 /// of the functional chain. The `expression` is only evaluated for entries that pass
275 /// all the preceding filters.
276 /// A new variable is created called `name`, accessible as if it was contained
277 /// in the dataset from subsequent transformations/actions.
278 ///
279 /// Use cases include:
280 /// * caching the results of complex calculations for easy and efficient multiple access
281 /// * extraction of quantities of interest from complex objects
282 ///
283 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
284 ///
285 /// ### Example usage:
286 /// ~~~{.cpp}
287 /// // assuming a function with signature:
288 /// double myComplexCalculation(const RVec<float> &muon_pts);
289 /// // we can pass it directly to Define
290 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
291 /// // alternatively, we can pass the body of the function as a string, as in Filter:
292 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
293 /// ~~~
294 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
296 {
297 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
298 }
299 // clang-format on
300
301 // clang-format off
302 ////////////////////////////////////////////////////////////////////////////
303 /// \brief Creates a custom column with a value dependent on the processing slot.
304 /// \param[in] name The name of the custom column.
305 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
306 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
307 /// \return the first node of the computation graph for which the new quantity is defined.
308 ///
309 /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
310 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
311 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
312 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
313 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
314 ///
315 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
316 /// ~~~{.cpp}
317 /// int function(unsigned int, double, double);
318 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
319 /// df.DefineSlot("x", function, {"column1", "column2"})
320 /// ~~~
321 ///
322 /// See Define for more information.
323 template <typename F>
325 {
326 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
327 }
328 // clang-format on
329
330 // clang-format off
331 ////////////////////////////////////////////////////////////////////////////
332 /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
333 /// \param[in] name The name of the custom column.
334 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
335 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
336 /// \return the first node of the computation graph for which the new quantity is defined.
337 ///
338 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
339 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
340 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
341 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
342 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
343 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
344 ///
345 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
346 /// ~~~{.cpp}
347 /// int function(unsigned int, ULong64_t, double, double);
348 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
349 /// DefineSlotEntry("x", function, {"column1", "column2"})
350 /// ~~~
351 ///
352 /// See Define for more information.
353 template <typename F>
355 {
356 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
357 }
358 // clang-format on
359
360 ////////////////////////////////////////////////////////////////////////////
361 /// \brief Creates a custom column
362 /// \param[in] name The name of the custom column.
363 /// \param[in] expression An expression in C++ which represents the temporary value
364 /// \return the first node of the computation graph for which the new quantity is defined.
365 ///
366 /// The expression is just-in-time compiled and used to produce the column entries.
367 /// It must be valid C++ syntax in which variable names are substituted with the names
368 /// of branches/columns.
369 ///
370 /// Refer to the first overload of this method for the full documentation.
372 {
373 // this check must be done before jitting lest we throw exceptions in jitted code
377
378 auto jittedCustomColumn =
379 std::make_shared<RDFDetail::RJittedCustomColumn>(fLoopManager, name, fLoopManager->GetNSlots());
380
381 RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, jittedCustomColumn, fCustomColumns,
383
385 newCols.AddName(name);
386 newCols.AddColumn(jittedCustomColumn, name);
387
388 fLoopManager->RegisterCustomColumn(jittedCustomColumn.get());
389
390 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
391
392 return newInterface;
393 }
394
395 ////////////////////////////////////////////////////////////////////////////
396 /// \brief Allow to refer to a column with a different name
397 /// \param[in] alias name of the column alias
398 /// \param[in] columnName of the column to be aliased
399 /// \return the first node of the computation graph for which the alias is available.
400 ///
401 /// Aliasing an alias is supported.
402 ///
403 /// ### Example usage:
404 /// ~~~{.cpp}
405 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
406 /// ~~~
408 {
409 // The symmetry with Define is clear. We want to:
410 // - Create globally the alias and return this very node, unchanged
411 // - Make aliases accessible based on chains and not globally
412
413 // Helper to find out if a name is a column
414 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
415
416 // If the alias name is a column name, there is a problem
418 fLoopManager->GetAliasMap(), dsColumnNames);
419
420 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
421
422 fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
423
425
426 newCols.AddName(alias);
427 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
428
429 return newInterface;
430 }
431
432 ////////////////////////////////////////////////////////////////////////////
433 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
434 /// \tparam ColumnTypes variadic list of branch/column types.
435 /// \param[in] treename The name of the output TTree.
436 /// \param[in] filename The name of the output TFile.
437 /// \param[in] columnList The list of names of the columns/branches to be written.
438 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
439 /// \return a `RDataFrame` that wraps the snapshotted dataset.
440 ///
441 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
442 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
443 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
444 /// written out and it appears before the array in the columnList.
445 ///
446 /// ### Example invocations:
447 ///
448 /// ~~~{.cpp}
449 /// // without specifying template parameters (column types automatically deduced)
450 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
451 ///
452 /// // specifying template parameters ("x" is `int`, "y" is `float`)
453 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
454 /// ~~~
455 ///
456 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
457 /// `RSnapshotOptions`:
458 /// ~~~{.cpp}
459 /// RSnapshotOptions opts;
460 /// opts.fLazy = true;
461 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
462 /// ~~~
463 template <typename... ColumnTypes>
465 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
466 const RSnapshotOptions &options = RSnapshotOptions())
467 {
468 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
469 }
470
471 ////////////////////////////////////////////////////////////////////////////
472 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
473 /// \param[in] treename The name of the output TTree.
474 /// \param[in] filename The name of the output TFile.
475 /// \param[in] columnList The list of names of the columns/branches to be written.
476 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
477 /// \return a `RDataFrame` that wraps the snapshotted dataset.
478 ///
479 /// This function returns a `RDataFrame` built with the output tree as a source.
480 /// The types of the columns are automatically inferred and do not need to be specified.
481 ///
482 /// See above for a more complete description and example usages.
484 const ColumnNames_t &columnList,
485 const RSnapshotOptions &options = RSnapshotOptions())
486 {
487 // Early return: if the list of columns is empty, just return an empty RDF
488 // If we proceed, the jitted call will not compile!
489 if (columnList.empty()) {
490 auto nEntries = *this->Count();
491 auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
492 return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
493 }
494 auto tree = fLoopManager->GetTree();
495 const auto nsID = fLoopManager->GetID();
496 std::stringstream snapCall;
497 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
498 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
500
501 // build a string equivalent to
502 // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
504 snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
506 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
507 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
508
509 const auto &customCols = fCustomColumns.GetNames();
510 const auto dontConvertVector = false;
511
512 const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
513
514 for (auto &c : validColumnNames) {
515 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
516 const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
517 snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom, dontConvertVector,
518 customColID)
519 << ", ";
520 };
521 if (!columnList.empty())
522 snapCall.seekp(-2, snapCall.cur); // remove the last ",
523 snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
524 << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
525 << RDFInternal::PrettyPrintAddr(&columnList) << "),"
526 << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
527 // jit snapCall, return result
528 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
529 RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
530 return resPtr;
531 }
532
533 // clang-format off
534 ////////////////////////////////////////////////////////////////////////////
535 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
536 /// \param[in] treename The name of the output TTree.
537 /// \param[in] filename The name of the output TFile.
538 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
539 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
540 /// \return a `RDataFrame` that wraps the snapshotted dataset.
541 ///
542 /// This function returns a `RDataFrame` built with the output tree as a source.
543 /// The types of the columns are automatically inferred and do not need to be specified.
544 ///
545 /// See above for a more complete description and example usages.
547 std::string_view columnNameRegexp = "",
548 const RSnapshotOptions &options = RSnapshotOptions())
549 {
553 columnNameRegexp,
554 "Snapshot");
555 return Snapshot(treename, filename, selectedColumns, options);
556 }
557 // clang-format on
558
559 // clang-format off
560 ////////////////////////////////////////////////////////////////////////////
561 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
562 /// \param[in] treename The name of the output TTree.
563 /// \param[in] filename The name of the output TFile.
564 /// \param[in] columnList The list of names of the columns/branches to be written.
565 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
566 /// \return a `RDataFrame` that wraps the snapshotted dataset.
567 ///
568 /// This function returns a `RDataFrame` built with the output tree as a source.
569 /// The types of the columns are automatically inferred and do not need to be specified.
570 ///
571 /// See above for a more complete description and example usages.
573 std::initializer_list<std::string> columnList,
574 const RSnapshotOptions &options = RSnapshotOptions())
575 {
576 ColumnNames_t selectedColumns(columnList);
577 return Snapshot(treename, filename, selectedColumns, options);
578 }
579 // clang-format on
580
581 ////////////////////////////////////////////////////////////////////////////
582 /// \brief Save selected columns in memory
583 /// \tparam ColumnTypes variadic list of branch/column types.
584 /// \param[in] columns to be cached in memory.
585 /// \return a `RDataFrame` that wraps the cached dataset.
586 ///
587 /// This action returns a new `RDataFrame` object, completely detached from
588 /// the originating `RDataFrame`. The new dataframe only contains the cached
589 /// columns and stores their content in memory for fast, zero-copy subsequent access.
590 ///
591 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
592 /// fits in memory and that will be accessed many times.
593 ///
594 /// ### Example usage:
595 ///
596 /// **Types and columns specified:**
597 /// ~~~{.cpp}
598 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
599 /// ~~~
600 ///
601 /// **Types inferred and columns specified (this invocation relies on jitting):**
602 /// ~~~{.cpp}
603 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
604 /// ~~~
605 ///
606 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
607 /// ~~~{.cpp}
608 /// auto cache_all_cols_df = df.Cache(myRegexp);
609 /// ~~~
610 template <typename... ColumnTypes>
612 {
613 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
614 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
615 }
616
617 ////////////////////////////////////////////////////////////////////////////
618 /// \brief Save selected columns in memory
619 /// \param[in] columns to be cached in memory
620 /// \return a `RDataFrame` that wraps the cached dataset.
621 ///
622 /// See the previous overloads for more information.
624 {
625 // Early return: if the list of columns is empty, just return an empty RDF
626 // If we proceed, the jitted call will not compile!
627 if (columnList.empty()) {
628 auto nEntries = *this->Count();
629 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
630 return emptyRDF;
631 }
632
633 auto tree = fLoopManager->GetTree();
634 const auto nsID = fLoopManager->GetID();
635 std::stringstream cacheCall;
636 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
637 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
639 // build a string equivalent to
640 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
641 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
642 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
644 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
645 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
646
647 const auto &customCols = fCustomColumns.GetNames();
648 for (auto &c : columnList) {
649 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
650 const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
651 cacheCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom,
652 /*vector2rvec=*/true, customColID)
653 << ", ";
654 };
655 if (!columnList.empty())
656 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
657 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
658 << RDFInternal::PrettyPrintAddr(&columnList) << "));";
659 // jit cacheCall, return result
660 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
661 RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
662 return resRDF;
663 }
664
665 ////////////////////////////////////////////////////////////////////////////
666 /// \brief Save selected columns in memory
667 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
668 /// \return a `RDataFrame` that wraps the cached dataset.
669 ///
670 /// The existing columns are matched against the regular expression. If the string provided
671 /// is empty, all columns are selected. See the previous overloads for more information.
673 {
674
676 columnNameRegexp, "Cache");
677 return Cache(selectedColumns);
678 }
679
680 ////////////////////////////////////////////////////////////////////////////
681 /// \brief Save selected columns in memory
682 /// \param[in] columns to be cached in memory.
683 /// \return a `RDataFrame` that wraps the cached dataset.
684 ///
685 /// See the previous overloads for more information.
686 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
687 {
688 ColumnNames_t selectedColumns(columnList);
689 return Cache(selectedColumns);
690 }
691
692 // clang-format off
693 ////////////////////////////////////////////////////////////////////////////
694 /// \brief Creates a node that filters entries based on range: [begin, end)
695 /// \param[in] begin Initial entry number considered for this range.
696 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
697 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
698 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
699 ///
700 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
701 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
702 ///
703 /// ### Example usage:
704 /// ~~~{.cpp}
705 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
706 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
707 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
708 /// ~~~
709 // clang-format on
710 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
711 {
712 // check invariants
713 if (stride == 0 || (end != 0 && end < begin))
714 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
715 CheckIMTDisabled("Range");
716
718 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
719 fLoopManager->Book(rangePtr.get());
721 return tdf_r;
722 }
723
724 // clang-format off
725 ////////////////////////////////////////////////////////////////////////////
726 /// \brief Creates a node that filters entries based on range
727 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
728 /// \return a node of the computation graph for which the range is defined.
729 ///
730 /// See the other Range overload for a detailed description.
731 // clang-format on
732 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
733
734 // clang-format off
735 ////////////////////////////////////////////////////////////////////////////
736 /// \brief Execute a user-defined function on each entry (*instant action*)
737 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
738 /// \param[in] columns Names of the columns/branches in input to the user function.
739 ///
740 /// The callable `f` is invoked once per entry. This is an *instant action*:
741 /// upon invocation, an event loop as well as execution of all scheduled actions
742 /// is triggered.
743 /// Users are responsible for the thread-safety of this callable when executing
744 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
745 ///
746 /// ### Example usage:
747 /// ~~~{.cpp}
748 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
749 /// ~~~
750 // clang-format on
751 template <typename F>
752 void Foreach(F f, const ColumnNames_t &columns = {})
753 {
754 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
755 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
756 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
757 }
758
759 // clang-format off
760 ////////////////////////////////////////////////////////////////////////////
761 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
762 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
763 /// \param[in] columns Names of the columns/branches in input to the user function.
764 ///
765 /// Same as `Foreach`, but the user-defined function takes an extra
766 /// `unsigned int` as its first parameter, the *processing slot index*.
767 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
768 /// for each thread of execution.
769 /// This is meant as a helper in writing thread-safe `Foreach`
770 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
771 /// The user-defined processing callable is able to follow different
772 /// *streams of processing* indexed by the first parameter.
773 /// `ForeachSlot` works just as well with single-thread execution: in that
774 /// case `slot` will always be `0`.
775 ///
776 /// ### Example usage:
777 /// ~~~{.cpp}
778 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
779 /// ~~~
780 // clang-format on
781 template <typename F>
782 void ForeachSlot(F f, const ColumnNames_t &columns = {})
783 {
784 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
785 constexpr auto nColumns = ColTypes_t::list_size;
786
787 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
788
789 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
790
791 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
793
794 auto action =
795 std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
796 fLoopManager->Book(action.get());
797
798 fLoopManager->Run();
799 }
800
801 // clang-format off
802 ////////////////////////////////////////////////////////////////////////////
803 /// \brief Execute a user-defined reduce operation on the values of a column.
804 /// \tparam F The type of the reduce callable. Automatically deduced.
805 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
806 /// \param[in] f A callable with signature `T(T,T)`
807 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
808 /// \return the reduced quantity wrapped in a `RResultPtr`.
809 ///
810 /// A reduction takes two values of a column and merges them into one (e.g.
811 /// by summing them, taking the maximum, etc). This action performs the
812 /// specified reduction operation on all processed column values, returning
813 /// a single value of the same type. The callable f must satisfy the general
814 /// requirements of a *processing function* besides having signature `T(T,T)`
815 /// where `T` is the type of column columnName.
816 ///
817 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
818 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
819 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
820 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
821 /// overload.
822 ///
823 /// ### Example usage:
824 /// ~~~{.cpp}
825 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
826 /// ~~~
827 ///
828 /// This action is *lazy*: upon invocation of this method the calculation is
829 /// booked but not executed. See RResultPtr documentation.
830 // clang-format on
831 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
833 {
834 static_assert(
835 std::is_default_constructible<T>::value,
836 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
837 return Reduce(std::move(f), columnName, T());
838 }
839
840 ////////////////////////////////////////////////////////////////////////////
841 /// \brief Execute a user-defined reduce operation on the values of a column.
842 /// \tparam F The type of the reduce callable. Automatically deduced.
843 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
844 /// \param[in] f A callable with signature `T(T,T)`
845 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
846 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
847 /// \return the reduced quantity wrapped in a `RResultPtr`.
848 ///
849 /// ### Example usage:
850 /// ~~~{.cpp}
851 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
852 /// ~~~
853 /// See the description of the first Reduce overload for more information.
854 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
855 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
856 {
857 return Aggregate(f, f, columnName, redIdentity);
858 }
859
860 ////////////////////////////////////////////////////////////////////////////
861 /// \brief Return the number of entries processed (*lazy action*)
862 /// \return the number of entries wrapped in a `RResultPtr`.
863 ///
864 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
865 /// This action is *lazy*: upon invocation of this method the calculation is
866 /// booked but not executed. See RResultPtr documentation.
867 ///
868 /// ### Example usage:
869 /// ~~~{.cpp}
870 /// auto nEntriesAfterCuts = myFilteredDf.Count();
871 /// ~~~
872 ///
874 {
875 const auto nSlots = fLoopManager->GetNSlots();
876 auto cSPtr = std::make_shared<ULong64_t>(0);
877 using Helper_t = RDFInternal::CountHelper;
879 auto action =
880 std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, std::move(fCustomColumns));
881 fLoopManager->Book(action.get());
882 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
883 }
884
885 ////////////////////////////////////////////////////////////////////////////
886 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
887 /// \tparam T The type of the column.
888 /// \tparam COLL The type of collection used to store the values.
889 /// \param[in] column The name of the column to collect the values of.
890 /// \return the content of the selected column wrapped in a `RResultPtr`.
891 ///
892 /// The collection type to be specified for C-style array columns is `RVec<T>`:
893 /// in this case the returned collection is a `std::vector<RVec<T>>`.
894 /// ### Example usage:
895 /// ~~~{.cpp}
896 /// // In this case intCol is a std::vector<int>
897 /// auto intCol = rdf.Take<int>("integerColumn");
898 /// // Same content as above but in this case taken as a RVec<int>
899 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
900 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
901 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
902 /// ~~~
903 /// This action is *lazy*: upon invocation of this method the calculation is
904 /// booked but not executed. See RResultPtr documentation.
905 template <typename T, typename COLL = std::vector<T>>
907 {
908 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
909
910 const auto validColumnNames = GetValidatedColumnNames(1, columns);
911
912 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
913
914 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
916 auto valuesPtr = std::make_shared<COLL>();
917 const auto nSlots = fLoopManager->GetNSlots();
918
919 auto action =
920 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
921 fLoopManager->Book(action.get());
922 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
923 }
924
925 ////////////////////////////////////////////////////////////////////////////
926 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
927 /// \tparam V The type of the column used to fill the histogram.
928 /// \param[in] model The returned histogram will be constructed using this as a model.
929 /// \param[in] vName The name of the column that will fill the histogram.
930 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
931 ///
932 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
933 /// is filled with each one of the elements of the container. In case multiple columns of container type
934 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
935 /// possibly different lengths between events).
936 /// This action is *lazy*: upon invocation of this method the calculation is
937 /// booked but not executed. See RResultPtr documentation.
938 ///
939 /// ### Example usage:
940 /// ~~~{.cpp}
941 /// // Deduce column type (this invocation needs jitting internally)
942 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
943 /// // Explicit column type
944 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
945 /// ~~~
946 ///
947 template <typename V = RDFDetail::RInferredType>
948 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
949 {
950 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
951
952 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
953
954 std::shared_ptr<::TH1D> h(nullptr);
955 {
956 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
957 h = model.GetHistogram();
958 h->SetDirectory(nullptr);
959 }
960
961 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
962 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
963 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
964 }
965
966 ////////////////////////////////////////////////////////////////////////////
967 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
968 /// \tparam V The type of the column used to fill the histogram.
969 /// \param[in] vName The name of the column that will fill the histogram.
970 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
971 ///
972 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
973 /// The "name" and "title" strings are built starting from the input column name.
974 /// See the description of the first Histo1D overload for more details.
975 ///
976 /// ### Example usage:
977 /// ~~~{.cpp}
978 /// // Deduce column type (this invocation needs jitting internally)
979 /// auto myHist1 = myDf.Histo1D("myColumn");
980 /// // Explicit column type
981 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
982 /// ~~~
983 ///
984 template <typename V = RDFDetail::RInferredType>
986 {
987 const auto h_name = std::string(vName);
988 const auto h_title = h_name + ";" + h_name + ";";
989 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
990 }
991
992 ////////////////////////////////////////////////////////////////////////////
993 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
994 /// \tparam V The type of the column used to fill the histogram.
995 /// \tparam W The type of the column used as weights.
996 /// \param[in] model The returned histogram will be constructed using this as a model.
997 /// \param[in] vName The name of the column that will fill the histogram.
998 /// \param[in] wName The name of the column that will provide the weights.
999 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1000 ///
1001 /// See the description of the first Histo1D overload for more details.
1002 ///
1003 /// ### Example usage:
1004 /// ~~~{.cpp}
1005 /// // Deduce column type (this invocation needs jitting internally)
1006 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1007 /// // Explicit column type
1008 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1009 /// ~~~
1010 ///
1011 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1013 {
1014 const std::vector<std::string_view> columnViews = {vName, wName};
1015 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1016 ? ColumnNames_t()
1017 : ColumnNames_t(columnViews.begin(), columnViews.end());
1018 std::shared_ptr<::TH1D> h(nullptr);
1019 {
1020 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1021 h = model.GetHistogram();
1022 }
1023 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1024 }
1025
1026 ////////////////////////////////////////////////////////////////////////////
1027 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1028 /// \tparam V The type of the column used to fill the histogram.
1029 /// \tparam W The type of the column used as weights.
1030 /// \param[in] vName The name of the column that will fill the histogram.
1031 /// \param[in] wName The name of the column that will provide the weights.
1032 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1033 ///
1034 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1035 /// The "name" and "title" strings are built starting from the input column names.
1036 /// See the description of the first Histo1D overload for more details.
1037 ///
1038 /// ### Example usage:
1039 /// ~~~{.cpp}
1040 /// // Deduce column types (this invocation needs jitting internally)
1041 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1042 /// // Explicit column types
1043 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1044 /// ~~~
1045 ///
1046 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1048 {
1049 // We build name and title based on the value and weight column names
1050 const auto h_name = std::string(vName) + "*" + std::string(wName);
1051 const auto h_title = h_name + ";" + h_name + ";";
1052 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1053 }
1054
1055 ////////////////////////////////////////////////////////////////////////////
1056 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1057 /// \tparam V The type of the column used to fill the histogram.
1058 /// \tparam W The type of the column used as weights.
1059 /// \param[in] model The returned histogram will be constructed using this as a model.
1060 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1061 ///
1062 /// This overload will use the first two default columns as column names.
1063 /// See the description of the first Histo1D overload for more details.
1064 template <typename V, typename W>
1065 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1066 {
1067 return Histo1D<V, W>(model, "", "");
1068 }
1069
1070 ////////////////////////////////////////////////////////////////////////////
1071 /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1072 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1073 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1074 /// \param[in] model The returned histogram will be constructed using this as a model.
1075 /// \param[in] v1Name The name of the column that will fill the x axis.
1076 /// \param[in] v2Name The name of the column that will fill the y axis.
1077 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1078 ///
1079 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1080 /// is filled with each one of the elements of the container. In case multiple columns of container type
1081 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1082 /// possibly different lengths between events).
1083 /// This action is *lazy*: upon invocation of this method the calculation is
1084 /// booked but not executed. See RResultPtr documentation.
1085 ///
1086 /// ### Example usage:
1087 /// ~~~{.cpp}
1088 /// // Deduce column types (this invocation needs jitting internally)
1089 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1090 /// // Explicit column types
1091 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1092 /// ~~~
1093 ///
1094 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1096 {
1097 std::shared_ptr<::TH2D> h(nullptr);
1098 {
1099 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1100 h = model.GetHistogram();
1101 }
1102 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1103 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1104 }
1105 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1106 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1107 ? ColumnNames_t()
1108 : ColumnNames_t(columnViews.begin(), columnViews.end());
1109 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1110 }
1111
1112 ////////////////////////////////////////////////////////////////////////////
1113 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1114 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1115 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1116 /// \tparam W The type of the column used for the weights of the histogram.
1117 /// \param[in] model The returned histogram will be constructed using this as a model.
1118 /// \param[in] v1Name The name of the column that will fill the x axis.
1119 /// \param[in] v2Name The name of the column that will fill the y axis.
1120 /// \param[in] wName The name of the column that will provide the weights.
1121 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1122 ///
1123 /// This action is *lazy*: upon invocation of this method the calculation is
1124 /// booked but not executed. See RResultPtr documentation.
1125 /// The user gives up ownership of the model histogram.
1126 ///
1127 /// ### Example usage:
1128 /// ~~~{.cpp}
1129 /// // Deduce column types (this invocation needs jitting internally)
1130 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1131 /// // Explicit column types
1132 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1133 /// ~~~
1134 ///
1135 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1136 typename W = RDFDetail::RInferredType>
1139 {
1140 std::shared_ptr<::TH2D> h(nullptr);
1141 {
1142 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1143 h = model.GetHistogram();
1144 }
1145 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1146 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1147 }
1148 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1149 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1150 ? ColumnNames_t()
1151 : ColumnNames_t(columnViews.begin(), columnViews.end());
1152 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1153 }
1154
1155 template <typename V1, typename V2, typename W>
1157 {
1158 return Histo2D<V1, V2, W>(model, "", "", "");
1159 }
1160
1161 ////////////////////////////////////////////////////////////////////////////
1162 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1163 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1164 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1165 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1166 /// \param[in] model The returned histogram will be constructed using this as a model.
1167 /// \param[in] v1Name The name of the column that will fill the x axis.
1168 /// \param[in] v2Name The name of the column that will fill the y axis.
1169 /// \param[in] v3Name The name of the column that will fill the z axis.
1170 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1171 ///
1172 /// This action is *lazy*: upon invocation of this method the calculation is
1173 /// booked but not executed. See RResultPtr documentation.
1174 ///
1175 /// ### Example usage:
1176 /// ~~~{.cpp}
1177 /// // Deduce column types (this invocation needs jitting internally)
1178 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1179 /// "myValueX", "myValueY", "myValueZ");
1180 /// // Explicit column types
1181 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1182 /// "myValueX", "myValueY", "myValueZ");
1183 /// ~~~
1184 ///
1185 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1186 typename V3 = RDFDetail::RInferredType>
1188 std::string_view v3Name = "")
1189 {
1190 std::shared_ptr<::TH3D> h(nullptr);
1191 {
1192 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1193 h = model.GetHistogram();
1194 }
1195 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1196 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1197 }
1198 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1199 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1200 ? ColumnNames_t()
1201 : ColumnNames_t(columnViews.begin(), columnViews.end());
1202 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1203 }
1204
1205 ////////////////////////////////////////////////////////////////////////////
1206 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1207 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1208 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1209 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1210 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1211 /// \param[in] model The returned histogram will be constructed using this as a model.
1212 /// \param[in] v1Name The name of the column that will fill the x axis.
1213 /// \param[in] v2Name The name of the column that will fill the y axis.
1214 /// \param[in] v3Name The name of the column that will fill the z axis.
1215 /// \param[in] wName The name of the column that will provide the weights.
1216 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1217 ///
1218 /// This action is *lazy*: upon invocation of this method the calculation is
1219 /// booked but not executed. See RResultPtr documentation.
1220 ///
1221 /// ### Example usage:
1222 /// ~~~{.cpp}
1223 /// // Deduce column types (this invocation needs jitting internally)
1224 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1225 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1226 /// // Explicit column types
1227 /// using d_t = double;
1228 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1229 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1230 /// ~~~
1231 ///
1232 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1233 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1235 std::string_view v3Name, std::string_view wName)
1236 {
1237 std::shared_ptr<::TH3D> h(nullptr);
1238 {
1239 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1240 h = model.GetHistogram();
1241 }
1242 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1243 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1244 }
1245 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1246 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1247 ? ColumnNames_t()
1248 : ColumnNames_t(columnViews.begin(), columnViews.end());
1249 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1250 }
1251
1252 template <typename V1, typename V2, typename V3, typename W>
1254 {
1255 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1256 }
1257
1258 ////////////////////////////////////////////////////////////////////////////
1259 /// \brief Fill and return a graph (*lazy action*)
1260 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1261 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1262 /// \param[in] v1Name The name of the column that will fill the x axis.
1263 /// \param[in] v2Name The name of the column that will fill the y axis.
1264 /// \return the graph wrapped in a `RResultPtr`.
1265 ///
1266 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1267 /// is filled with each one of the elements of the container.
1268 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1269 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1270 /// A name and a title to the graph is given based on the input column names.
1271 ///
1272 /// This action is *lazy*: upon invocation of this method the calculation is
1273 /// booked but not executed. See RResultPtr documentation.
1274 ///
1275 /// ### Example usage:
1276 /// ~~~{.cpp}
1277 /// // Deduce column types (this invocation needs jitting internally)
1278 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1279 /// // Explicit column types
1280 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1281 /// ~~~
1282 ///
1283 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1285 {
1286 auto graph = std::make_shared<::TGraph>();
1287 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1288 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1289 ? ColumnNames_t()
1290 : ColumnNames_t(columnViews.begin(), columnViews.end());
1291
1292 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1293
1294 // We build a default name and title based on the input columns
1295 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1296 const auto v2Name_str = std::string(v2Name);
1297 const auto g_name = std::string(v1Name) + "*" + v2Name_str;
1298 graph->SetNameTitle(g_name.c_str(), g_name.c_str());
1299 graph->GetXaxis()->SetTitle(g_name.c_str());
1300 graph->GetYaxis()->SetTitle(v2Name_str.c_str());
1301 }
1302
1303 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1304 }
1305
1306 ////////////////////////////////////////////////////////////////////////////
1307 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1308 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1309 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1310 /// \param[in] model The model to be considered to build the new return value.
1311 /// \param[in] v1Name The name of the column that will fill the x axis.
1312 /// \param[in] v2Name The name of the column that will fill the y axis.
1313 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1314 ///
1315 /// This action is *lazy*: upon invocation of this method the calculation is
1316 /// booked but not executed. See RResultPtr documentation.
1317 ///
1318 /// ### Example usage:
1319 /// ~~~{.cpp}
1320 /// // Deduce column types (this invocation needs jitting internally)
1321 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1322 /// // Explicit column types
1323 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1324 /// ~~~
1325 ///
1326 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1328 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1329 {
1330 std::shared_ptr<::TProfile> h(nullptr);
1331 {
1332 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1333 h = model.GetProfile();
1334 }
1335
1336 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1337 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1338 }
1339 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1340 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1341 ? ColumnNames_t()
1342 : ColumnNames_t(columnViews.begin(), columnViews.end());
1343 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1344 }
1345
1346 ////////////////////////////////////////////////////////////////////////////
1347 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1348 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1349 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1350 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1351 /// \param[in] model The model to be considered to build the new return value.
1352 /// \param[in] v1Name The name of the column that will fill the x axis.
1353 /// \param[in] v2Name The name of the column that will fill the y axis.
1354 /// \param[in] wName The name of the column that will provide the weights.
1355 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1356 ///
1357 /// This action is *lazy*: upon invocation of this method the calculation is
1358 /// booked but not executed. See RResultPtr documentation.
1359 ///
1360 /// ### Example usage:
1361 /// ~~~{.cpp}
1362 /// // Deduce column types (this invocation needs jitting internally)
1363 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1364 /// // Explicit column types
1365 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1366 /// "xValues", "yValues", "weight");
1367 /// ~~~
1368 ///
1369 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1370 typename W = RDFDetail::RInferredType>
1373 {
1374 std::shared_ptr<::TProfile> h(nullptr);
1375 {
1376 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1377 h = model.GetProfile();
1378 }
1379
1380 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1381 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1382 }
1383 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1384 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1385 ? ColumnNames_t()
1386 : ColumnNames_t(columnViews.begin(), columnViews.end());
1387 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1388 }
1389
1390 template <typename V1, typename V2, typename W>
1392 {
1393 return Profile1D<V1, V2, W>(model, "", "", "");
1394 }
1395
1396 ////////////////////////////////////////////////////////////////////////////
1397 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1398 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1399 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1400 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1401 /// \param[in] model The returned profile will be constructed using this as a model.
1402 /// \param[in] v1Name The name of the column that will fill the x axis.
1403 /// \param[in] v2Name The name of the column that will fill the y axis.
1404 /// \param[in] v3Name The name of the column that will fill the z axis.
1405 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1406 ///
1407 /// This action is *lazy*: upon invocation of this method the calculation is
1408 /// booked but not executed. See RResultPtr documentation.
1409 ///
1410 /// ### Example usage:
1411 /// ~~~{.cpp}
1412 /// // Deduce column types (this invocation needs jitting internally)
1413 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1414 /// "xValues", "yValues", "zValues");
1415 /// // Explicit column types
1416 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1417 /// "xValues", "yValues", "zValues");
1418 /// ~~~
1419 ///
1420 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1421 typename V3 = RDFDetail::RInferredType>
1423 std::string_view v2Name = "", std::string_view v3Name = "")
1424 {
1425 std::shared_ptr<::TProfile2D> h(nullptr);
1426 {
1427 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1428 h = model.GetProfile();
1429 }
1430
1431 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1432 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1433 }
1434 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1435 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1436 ? ColumnNames_t()
1437 : ColumnNames_t(columnViews.begin(), columnViews.end());
1438 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1439 }
1440
1441 ////////////////////////////////////////////////////////////////////////////
1442 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1443 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1444 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1445 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1446 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1447 /// \param[in] model The returned histogram will be constructed using this as a model.
1448 /// \param[in] v1Name The name of the column that will fill the x axis.
1449 /// \param[in] v2Name The name of the column that will fill the y axis.
1450 /// \param[in] v3Name The name of the column that will fill the z axis.
1451 /// \param[in] wName The name of the column that will provide the weights.
1452 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1453 ///
1454 /// This action is *lazy*: upon invocation of this method the calculation is
1455 /// booked but not executed. See RResultPtr documentation.
1456 ///
1457 /// ### Example usage:
1458 /// ~~~{.cpp}
1459 /// // Deduce column types (this invocation needs jitting internally)
1460 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1461 /// "xValues", "yValues", "zValues", "weight");
1462 /// // Explicit column types
1463 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1464 /// "xValues", "yValues", "zValues", "weight");
1465 /// ~~~
1466 ///
1467 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1468 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1470 std::string_view v3Name, std::string_view wName)
1471 {
1472 std::shared_ptr<::TProfile2D> h(nullptr);
1473 {
1474 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1475 h = model.GetProfile();
1476 }
1477
1478 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1479 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1480 }
1481 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1482 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1483 ? ColumnNames_t()
1484 : ColumnNames_t(columnViews.begin(), columnViews.end());
1485 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1486 }
1487
1488 template <typename V1, typename V2, typename V3, typename W>
1490 {
1491 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1492 }
1493
1494 ////////////////////////////////////////////////////////////////////////////
1495 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1496 ///
1497 /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1498 /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1499 /// specified columns (these types are passed as template parameters to this method).
1500 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1501 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1502 /// \tparam T The type of the object to fill. Automatically deduced.
1503 /// \param[in] model The model to be considered to build the new return value.
1504 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1505 /// \return the filled object wrapped in a `RResultPtr`.
1506 ///
1507 /// The user gives up ownership of the model object.
1508 /// The list of column names to be used for filling must always be specified.
1509 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1510 /// See RResultPtr documentation.
1511 ///
1512 /// ### Example usage:
1513 /// ~~~{.cpp}
1514 /// MyClass obj;
1515 /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1516 /// ~~~
1517 ///
1518 template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1519 RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1520 {
1521 auto h = std::make_shared<T>(std::forward<T>(model));
1522 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1523 throw std::runtime_error("The absence of axes limits is not supported yet.");
1524 }
1525 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1526 }
1527
1528 ////////////////////////////////////////////////////////////////////////////
1529 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1530 ///
1531 /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1532 /// method with these types. See previous overload for more information.
1533 /// \tparam T The type of the object to fill. Automatically deduced.
1534 /// \param[in] model The model to be considered to build the new return value.
1535 /// \param[in] columnList The name of the columns read to fill the object.
1536 /// \return the filled object wrapped in a `RResultPtr`.
1537 ///
1538 /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1539 /// previous overload. Check the previous overload for more details on `Fill`.
1540 ///
1541 /// ### Example usage:
1542 /// ~~~{.cpp}
1543 /// MyClass obj;
1544 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1545 /// ~~~
1546 ///
1547 template <typename T>
1548 RResultPtr<T> Fill(T &&model, const ColumnNames_t &bl)
1549 {
1550 auto h = std::make_shared<T>(std::forward<T>(model));
1551 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1552 throw std::runtime_error("The absence of axes limits is not supported yet.");
1553 }
1554 return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1555 }
1556
1557 ////////////////////////////////////////////////////////////////////////////
1558 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1559 ///
1560 /// \tparam V The type of the value column
1561 /// \param[in] value The name of the column with the values to fill the statistics with.
1562 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1563 ///
1564 /// ### Example usage:
1565 /// ~~~{.cpp}
1566 /// // Deduce column type (this invocation needs jitting internally)
1567 /// auto stats0 = myDf.Stats("values");
1568 /// // Explicit column type
1569 /// auto stats1 = myDf.Stats<float>("values");
1570 /// ~~~
1571 ///
1572 template<typename V = RDFDetail::RInferredType>
1574 {
1575 ColumnNames_t columns;
1576 if (!value.empty()) {
1577 columns.emplace_back(std::string(value));
1578 }
1579 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1580 if (std::is_same<V, RDFDetail::RInferredType>::value) {
1581 return Fill(TStatistic(), validColumnNames);
1582 }
1583 else {
1584 return Fill<V>(TStatistic(), validColumnNames);
1585 }
1586 }
1587
1588 ////////////////////////////////////////////////////////////////////////////
1589 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1590 ///
1591 /// \tparam V The type of the value column
1592 /// \tparam W The type of the weight column
1593 /// \param[in] value The name of the column with the values to fill the statistics with.
1594 /// \param[in] weight The name of the column with the weights to fill the statistics with.
1595 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1596 ///
1597 /// ### Example usage:
1598 /// ~~~{.cpp}
1599 /// // Deduce column types (this invocation needs jitting internally)
1600 /// auto stats0 = myDf.Stats("values", "weights");
1601 /// // Explicit column types
1602 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1603 /// ~~~
1604 ///
1605 template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1607 {
1608 ColumnNames_t columns {std::string(value), std::string(weight)};
1609 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1610 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1611 const auto validColumnNames = GetValidatedColumnNames(2, columns);
1612 // We have 3 cases:
1613 // 1. Both types are inferred: we use Fill and let the jit kick in.
1614 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1615 // 3. Both types are explicit: we invoke the fully compiled Fill method.
1616 if (vIsInferred && wIsInferred) {
1617 return Fill(TStatistic(), validColumnNames);
1618 } else if (vIsInferred != wIsInferred) {
1619 std::string error("The ");
1620 error += vIsInferred ? "value " : "weight ";
1621 error += "column type is explicit, while the ";
1622 error += vIsInferred ? "weight " : "value ";
1623 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1624 throw std::runtime_error(error);
1625 } else {
1626 return Fill<V, W>(TStatistic(), validColumnNames);
1627 }
1628 }
1629
1630 ////////////////////////////////////////////////////////////////////////////
1631 /// \brief Return the minimum of processed column values (*lazy action*)
1632 /// \tparam T The type of the branch/column.
1633 /// \param[in] columnName The name of the branch/column to be treated.
1634 /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1635 ///
1636 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1637 /// template specialization of this method.
1638 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1639 ///
1640 /// This action is *lazy*: upon invocation of this method the calculation is
1641 /// booked but not executed. See RResultPtr documentation.
1642 ///
1643 /// ### Example usage:
1644 /// ~~~{.cpp}
1645 /// // Deduce column type (this invocation needs jitting internally)
1646 /// auto minVal0 = myDf.Min("values");
1647 /// // Explicit column type
1648 /// auto minVal1 = myDf.Min<double>("values");
1649 /// ~~~
1650 ///
1651 template <typename T = RDFDetail::RInferredType>
1653 {
1654 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1655 using RetType_t = RDFDetail::MinReturnType_t<T>;
1656 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1657 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1658 }
1659
1660 ////////////////////////////////////////////////////////////////////////////
1661 /// \brief Return the maximum of processed column values (*lazy action*)
1662 /// \tparam T The type of the branch/column.
1663 /// \param[in] columnName The name of the branch/column to be treated.
1664 /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1665 ///
1666 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1667 /// template specialization of this method.
1668 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1669 ///
1670 /// This action is *lazy*: upon invocation of this method the calculation is
1671 /// booked but not executed. See RResultPtr documentation.
1672 ///
1673 /// ### Example usage:
1674 /// ~~~{.cpp}
1675 /// // Deduce column type (this invocation needs jitting internally)
1676 /// auto maxVal0 = myDf.Max("values");
1677 /// // Explicit column type
1678 /// auto maxVal1 = myDf.Max<double>("values");
1679 /// ~~~
1680 ///
1681 template <typename T = RDFDetail::RInferredType>
1683 {
1684 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1685 using RetType_t = RDFDetail::MaxReturnType_t<T>;
1686 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1687 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1688 }
1689
1690 ////////////////////////////////////////////////////////////////////////////
1691 /// \brief Return the mean of processed column values (*lazy action*)
1692 /// \tparam T The type of the branch/column.
1693 /// \param[in] columnName The name of the branch/column to be treated.
1694 /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1695 ///
1696 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1697 /// template specialization of this method.
1698 ///
1699 /// This action is *lazy*: upon invocation of this method the calculation is
1700 /// booked but not executed. See RResultPtr documentation.
1701 ///
1702 /// ### Example usage:
1703 /// ~~~{.cpp}
1704 /// // Deduce column type (this invocation needs jitting internally)
1705 /// auto meanVal0 = myDf.Mean("values");
1706 /// // Explicit column type
1707 /// auto meanVal1 = myDf.Mean<double>("values");
1708 /// ~~~
1709 ///
1710 template <typename T = RDFDetail::RInferredType>
1712 {
1713 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1714 auto meanV = std::make_shared<double>(0);
1715 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1716 }
1717
1718 ////////////////////////////////////////////////////////////////////////////
1719 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1720 /// \tparam T The type of the branch/column.
1721 /// \param[in] columnName The name of the branch/column to be treated.
1722 /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1723 ///
1724 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1725 /// template specialization of this method.
1726 ///
1727 /// This action is *lazy*: upon invocation of this method the calculation is
1728 /// booked but not executed. See RResultPtr documentation.
1729 ///
1730 /// ### Example usage:
1731 /// ~~~{.cpp}
1732 /// // Deduce column type (this invocation needs jitting internally)
1733 /// auto stdDev0 = myDf.StdDev("values");
1734 /// // Explicit column type
1735 /// auto stdDev1 = myDf.StdDev<double>("values");
1736 /// ~~~
1737 ///
1738 template <typename T = RDFDetail::RInferredType>
1740 {
1741 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1742 auto stdDeviationV = std::make_shared<double>(0);
1743 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1744 }
1745
1746 // clang-format off
1747 ////////////////////////////////////////////////////////////////////////////
1748 /// \brief Return the sum of processed column values (*lazy action*)
1749 /// \tparam T The type of the branch/column.
1750 /// \param[in] columnName The name of the branch/column.
1751 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1752 /// \return the sum of the selected column wrapped in a `RResultPtr`.
1753 ///
1754 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1755 /// template specialization of this method.
1756 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1757 ///
1758 /// This action is *lazy*: upon invocation of this method the calculation is
1759 /// booked but not executed. See RResultPtr documentation.
1760 ///
1761 /// ### Example usage:
1762 /// ~~~{.cpp}
1763 /// // Deduce column type (this invocation needs jitting internally)
1764 /// auto sum0 = myDf.Sum("values");
1765 /// // Explicit column type
1766 /// auto sum1 = myDf.Sum<double>("values");
1767 /// ~~~
1768 ///
1769 template <typename T = RDFDetail::RInferredType>
1771 Sum(std::string_view columnName = "",
1772 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1773 {
1774 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1775 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1776 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1777 }
1778 // clang-format on
1779
1780 ////////////////////////////////////////////////////////////////////////////
1781 /// \brief Gather filtering statistics
1782 /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1783 ///
1784 /// Calling `Report` on the main `RDataFrame` object gathers stats for
1785 /// all named filters in the call graph. Calling this method on a
1786 /// stored chain state (i.e. a graph node different from the first) gathers
1787 /// the stats for all named filters in the chain section between the original
1788 /// `RDataFrame` and that node (included). Stats are gathered in the same
1789 /// order as the named filters have been added to the graph.
1790 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1791 /// effects cuts had.
1792 ///
1793 /// This action is *lazy*: upon invocation of
1794 /// this method the calculation is booked but not executed. See RResultPtr
1795 /// documentation.
1796 ///
1797 /// ### Example usage:
1798 /// ~~~{.cpp}
1799 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1800 /// auto cutReport = filtered3.Report();
1801 /// cutReport->Print();
1802 /// ~~~
1803 ///
1805 {
1806 bool returnEmptyReport = false;
1807 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1808 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1809 // certainly does not contain named filters.
1810 // The number 4 takes into account the implicit columns for entry and slot number
1811 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1812 if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1813 returnEmptyReport = true;
1814
1815 auto rep = std::make_shared<RCutFlowReport>();
1816 using Helper_t = RDFInternal::ReportHelper<Proxied>;
1818
1819 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1821
1822 fLoopManager->Book(action.get());
1823 return MakeResultPtr(rep, *fLoopManager, std::move(action));
1824 }
1825
1826 /////////////////////////////////////////////////////////////////////////////
1827 /// \brief Returns the names of the available columns
1828 /// \return the container of column names.
1829 ///
1830 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1831 ///
1832 /// ### Example usage:
1833 /// ~~~{.cpp}
1834 /// auto colNames = d.GetColumnNames();
1835 /// // Print columns' names
1836 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1837 /// ~~~
1838 ///
1840 {
1841 ColumnNames_t allColumns;
1842
1843 auto addIfNotInternal = [&allColumns](std::string_view colName) {
1844 if (!RDFInternal::IsInternalColumn(colName))
1845 allColumns.emplace_back(colName);
1846 };
1847
1848 auto columnNames = fCustomColumns.GetNames();
1849
1850 std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1851
1852 auto tree = fLoopManager->GetTree();
1853 if (tree) {
1854 auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1855 allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1856 }
1857
1858 if (fDataSource) {
1859 auto &dsColNames = fDataSource->GetColumnNames();
1860 allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1861 }
1862
1863 return allColumns;
1864 }
1865
1866 /////////////////////////////////////////////////////////////////////////////
1867 /// \brief Return the type of a given column as a string.
1868 /// \return the type of the required column.
1869 ///
1870 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1871 ///
1872 /// ### Example usage:
1873 /// ~~~{.cpp}
1874 /// auto colType = d.GetColumnType("columnName");
1875 /// // Print column type
1876 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1877 /// ~~~
1878 ///
1880 {
1881 const auto &customCols = fCustomColumns.GetNames();
1882 const bool convertVector2RVec = true;
1883 const auto isCustom = std::find(customCols.begin(), customCols.end(), column) != customCols.end();
1884 if (!isCustom) {
1885 return RDFInternal::ColumnName2ColumnTypeName(std::string(column), fLoopManager->GetID(),
1887 convertVector2RVec);
1888 } else {
1889 // must convert the alias "__rdf::column_type" to a readable type
1890 const auto colID = std::to_string(fCustomColumns.GetColumns().at(std::string(column))->GetID());
1891 const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__rdf" + std::to_string(fLoopManager->GetID()) +
1892 "::" + std::string(column) + colID + "_type))";
1893 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
1894 const auto calcRes = RDFInternal::InterpreterCalc(call);
1895 return *reinterpret_cast<std::string *>(calcRes); // copy result to stack
1896 }
1897 }
1898
1899 /// \brief Returns the names of the filters created.
1900 /// \return the container of filters names.
1901 ///
1902 /// If called on a root node, all the filters in the computation graph will
1903 /// be printed. For any other node, only the filters upstream of that node.
1904 /// Filters without a name are printed as "Unnamed Filter"
1905 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1906 ///
1907 /// ### Example usage:
1908 /// ~~~{.cpp}
1909 /// auto filtNames = d.GetFilterNames();
1910 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1911 /// ~~~
1912 ///
1913 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1914
1915 /// \brief Returns the names of the defined columns
1916 /// \return the container of the defined column names.
1917 ///
1918 /// This is not an action nor a transformation, just a simple utility to
1919 /// get the columns names that have been defined up to the node.
1920 /// If no custom column has been defined, e.g. on a root node, it returns an
1921 /// empty collection.
1922 ///
1923 /// ### Example usage:
1924 /// ~~~{.cpp}
1925 /// auto defColNames = d.GetDefinedColumnNames();
1926 /// // Print defined columns' names
1927 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1928 /// ~~~
1929 ///
1931 {
1932 ColumnNames_t definedColumns;
1933
1934 auto columns = fCustomColumns.GetColumns();
1935
1936 for (auto column : columns) {
1937 if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1938 definedColumns.emplace_back(column.first);
1939 }
1940
1941 return definedColumns;
1942 }
1943
1944 /// \brief Checks if a column is present in the dataset
1945 /// \return true if the column is available, false otherwise
1946 ///
1947 /// This method checks if a column is part of the input ROOT dataset, has
1948 /// been defined or can be provided by the data source.
1949 ///
1950 /// Example usage:
1951 /// ~~~{.cpp}
1952 /// ROOT::RDataFrame base(1);
1953 /// auto rdf = base.Define("definedColumn", [](){return 0;});
1954 /// rdf.HasColumn("definedColumn"); // true: we defined it
1955 /// rdf.HasColumn("rdfentry_"); // true: it's always there
1956 /// rdf.HasColumn("foo"); // false: it is not there
1957 /// ~~~
1959 {
1960 if (fCustomColumns.HasName(columnName))
1961 return true;
1962
1963 if (auto tree = fLoopManager->GetTree()) {
1964 const auto &branchNames = fLoopManager->GetBranchNames();
1965 const auto branchNamesEnd = branchNames.end();
1966 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1967 return true;
1968 }
1969
1970 if (fDataSource && fDataSource->HasColumn(columnName))
1971 return true;
1972
1973 return false;
1974 }
1975
1976 /// \brief Gets the number of data processing slots
1977 /// \return The number of data processing slots used by this RDataFrame instance
1978 ///
1979 /// This method returns the number of data processing slots used by this RDataFrame
1980 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1981 ///
1982 /// Example usage:
1983 /// ~~~{.cpp}
1984 /// ROOT::EnableImplicitMT(6)
1985 /// ROOT::RDataFrame df(1);
1986 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1987 /// ~~~
1988 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1989
1990 // clang-format off
1991 ////////////////////////////////////////////////////////////////////////////
1992 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1993 /// \tparam F The type of the aggregator callable. Automatically deduced.
1994 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1995 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1996 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1997 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1998 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1999 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2000 /// \return the result of the aggregation wrapped in a `RResultPtr`.
2001 ///
2002 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2003 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2004 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2005 /// the value of the column columnName.
2006 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2007 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2008 ///
2009 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2010 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2011 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2012 ///
2013 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2014 ///
2015 /// Example usage:
2016 /// ~~~{.cpp}
2017 /// auto aggregator = [](double acc, double x) { return acc * x; };
2018 /// ROOT::EnableImplicitMT();
2019 /// // If multithread is enabled, the aggregator function will be called by more threads
2020 /// // and will produce a vector of partial accumulators.
2021 /// // The merger function performs the final aggregation of these partial results.
2022 /// auto merger = [](std::vector<double> &accumulators) {
2023 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2024 /// accumulators[0] *= accumulators[i];
2025 /// }
2026 /// };
2027 ///
2028 /// // The accumulator is initialized at this value by every thread.
2029 /// double initValue = 1.;
2030 ///
2031 /// // Multiplies all elements of the column "x"
2032 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2033 /// ~~~
2034 // clang-format on
2035 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2036 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2037 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2038 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2039 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2040 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2041 {
2042 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2043 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2044 constexpr auto nColumns = ArgTypes::list_size;
2045
2046 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2047
2048 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2049
2050 auto accObjPtr = std::make_shared<U>(aggIdentity);
2051 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2052 using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2053 auto action = std::make_unique<Action_t>(
2054 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2055 fProxiedPtr, std::move(newColumns));
2056 fLoopManager->Book(action.get());
2057 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2058 }
2059
2060 // clang-format off
2061 ////////////////////////////////////////////////////////////////////////////
2062 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2063 /// \tparam F The type of the aggregator callable. Automatically deduced.
2064 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2065 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2066 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2067 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2068 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2069 /// \return the result of the aggregation wrapped in a `RResultPtr`.
2070 ///
2071 /// See previous Aggregate overload for more information.
2072 // clang-format on
2073 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2074 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2075 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2076 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2077 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2078 {
2079 static_assert(
2080 std::is_default_constructible<U>::value,
2081 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2082 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2083 }
2084
2085 // clang-format off
2086 ////////////////////////////////////////////////////////////////////////////
2087 /// \brief Book execution of a custom action using a user-defined helper object.
2088 /// \tparam ColumnTypes List of types of columns used by this action.
2089 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2090 /// \param[in] helper The Action Helper to be scheduled.
2091 /// \param[in] columns The names of the columns on which the helper acts.
2092 /// \return the result of the helper wrapped in a `RResultPtr`.
2093 ///
2094 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2095 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2096 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2097 ///
2098 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2099 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2100 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2101 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2102 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2103 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2104 /// the requested columns for the particular entry being processed.
2105 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2106 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2107 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2108 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2109 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2110 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2111 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2112 /// method concurrently, but will always pass different 'slot' numbers.
2113 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2114 /// Result_t). The RResultPtr returned by Book will point to this object.
2115 ///
2116 /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2117 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2118 // clang-format on
2119 template <typename... ColumnTypes, typename Helper>
2120 RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2121 {
2122 constexpr auto nColumns = sizeof...(ColumnTypes);
2123 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2124
2125 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2126
2127 // TODO add more static sanity checks on Helper
2128 using AH = RDFDetail::RActionImpl<Helper>;
2129 static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2130 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2131
2132 using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2133 auto resPtr = helper.GetResultPtr();
2134
2135 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2137
2138 auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2140 fLoopManager->Book(action.get());
2141 return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2142 }
2143
2144 ////////////////////////////////////////////////////////////////////////////
2145 /// \brief Provides a representation of the columns in the dataset
2146 /// \tparam ColumnTypes variadic list of branch/column types.
2147 /// \param[in] columnList Names of the columns to be displayed.
2148 /// \param[in] rows Number of events for each column to be displayed.
2149 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2150 ///
2151 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2152 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2153 /// complete version through `AsString()`.
2154 ///
2155 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2156 ///
2157 /// Example usage:
2158 /// ~~~{.cpp}
2159 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2160 /// auto d1 = rdf.Display("");
2161 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2162 /// auto d2 = d.Display({"x", "y"}, 128);
2163 /// // Printing the short representations, the event loop will run
2164 /// d1->Print();
2165 /// d2->Print();
2166 /// ~~~
2167 template <typename... ColumnTypes>
2168 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2169 {
2170 CheckIMTDisabled("Display");
2171
2172 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2173 return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2174 }
2175
2176 ////////////////////////////////////////////////////////////////////////////
2177 /// \brief Provides a representation of the columns in the dataset
2178 /// \param[in] columnList Names of the columns to be displayed.
2179 /// \param[in] rows Number of events for each column to be displayed.
2180 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2181 ///
2182 /// This overload automatically infers the column types.
2183 /// See the previous overloads for further details.
2184 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2185 {
2186 CheckIMTDisabled("Display");
2187 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2188 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2189 columnList.size());
2190 }
2191
2192 ////////////////////////////////////////////////////////////////////////////
2193 /// \brief Provides a representation of the columns in the dataset
2194 /// \param[in] columnNameRegexp A regular expression to select the columns.
2195 /// \param[in] rows Number of events for each column to be displayed.
2196 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2197 ///
2198 /// The existing columns are matched against the regular expression. If the string provided
2199 /// is empty, all columns are selected.
2200 /// See the previous overloads for further details.
2201 RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2202 {
2204 columnNameRegexp, "Display");
2205 return Display(selectedColumns, nRows);
2206 }
2207
2208 ////////////////////////////////////////////////////////////////////////////
2209 /// \brief Provides a representation of the columns in the dataset
2210 /// \param[in] columnList Names of the columns to be displayed.
2211 /// \param[in] nRows Number of events for each column to be displayed.
2212 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2213 ///
2214 /// See the previous overloads for further details.
2215 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2216 {
2217 ColumnNames_t selectedColumns(columnList);
2218 return Display(selectedColumns, nRows);
2219 }
2220
2221private:
2223 {
2225
2226 // Entry number column
2227 const auto entryColName = "rdfentry_";
2228 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2229 using NewColEntry_t =
2230 RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2231
2232 auto entryColumn = std::make_shared<NewColEntry_t>(fLoopManager, entryColName, std::move(entryColGen),
2233 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2234 newCols.AddName(entryColName);
2235 newCols.AddColumn(entryColumn, entryColName);
2236
2237 fLoopManager->RegisterCustomColumn(entryColumn.get());
2238
2239 // Declare return type to the interpreter, for future use by jitted actions
2240 auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + entryColName +
2241 std::to_string(entryColumn->GetID()) + "_type = ULong64_t; }";
2242 fLoopManager->ToJitDeclare(retTypeDeclaration);
2243
2244 // Slot number column
2245 const auto slotColName = "rdfslot_";
2246 auto slotColGen = [](unsigned int slot) { return slot; };
2247 using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2248
2249 auto slotColumn = std::make_shared<NewColSlot_t>(fLoopManager, slotColName, std::move(slotColGen),
2250 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2251
2252 newCols.AddName(slotColName);
2253 newCols.AddColumn(slotColumn, slotColName);
2254
2255 fLoopManager->RegisterCustomColumn(slotColumn.get());
2256
2257 fCustomColumns = std::move(newCols);
2258
2259 // Declare return type to the interpreter, for future use by jitted actions
2260 retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + slotColName +
2261 std::to_string(slotColumn->GetID()) + "_type = unsigned int; }";
2262 fLoopManager->ToJitDeclare(retTypeDeclaration);
2263
2264 fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2265 fCustomColumns.AddName("tdfentry_");
2266 fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2267 fCustomColumns.AddName("tdfslot_");
2268 }
2269
2270 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2271 {
2272 std::vector<std::string> types;
2273
2274 for (auto column : columnList) {
2275 types.push_back(GetColumnType(column));
2276 }
2277 return types;
2278 }
2279
2281 {
2283 std::string error(callerName);
2284 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2285 throw std::runtime_error(error);
2286 }
2287 }
2288
2289 // Type was specified by the user, no need to infer it
2290 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2291 typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2292 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2293 {
2294 constexpr auto nColumns = sizeof...(BranchTypes);
2295
2296 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2297
2298 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2300
2301 const auto nSlots = fLoopManager->GetNSlots();
2302
2303 auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2304 std::move(newColumns));
2305 fLoopManager->Book(action.get());
2306 return MakeResultPtr(r, *fLoopManager, std::move(action));
2307 }
2308
2309 // User did not specify type, do type inference
2310 // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2311 // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2312 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2313 typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2315 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2316 {
2317 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2318
2319 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2320 const unsigned int nSlots = fLoopManager->GetNSlots();
2321
2322 auto tree = fLoopManager->GetTree();
2323 auto rOnHeap = RDFInternal::MakeSharedOnHeap(r);
2324
2325 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2326 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2327 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2328
2329 auto jittedActionOnHeap =
2330 RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*fLoopManager));
2331
2332 auto toJit = RDFInternal::JitBuildAction(
2333 validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2334 tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, fLoopManager->GetID());
2335 fLoopManager->Book(jittedActionOnHeap->get());
2336 fLoopManager->ToJitExec(toJit);
2337 return MakeResultPtr(r, *fLoopManager, *jittedActionOnHeap);
2338 }
2339
2340 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2341 typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2342 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2343 {
2347
2348 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2349 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2350 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2351 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2352 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2353
2354 constexpr auto nColumns = ColTypes_t::list_size;
2355
2356 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2357
2358 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2359
2361 RDFInternal::RBookedCustomColumns newCols(newColumns);
2362 auto newColumn = std::make_shared<NewCol_t>(fLoopManager, name, std::forward<F>(expression), validColumnNames,
2363 fLoopManager->GetNSlots(), newCols);
2364
2365 // Declare return type to the interpreter, for future use by jitted actions
2366 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2367 if (retTypeName.empty()) {
2368 // The type is not known to the interpreter.
2369 // Forward-declare it as void + helpful comment, so that if this Define'd quantity is
2370 // ever used by jitted code users will have some way to know what went wrong
2371 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2372 retTypeName = "void /* The type of column \"" + std::string(name) + "\" (" + demangledType +
2373 ") is not known to the interpreter. */";
2374 }
2375 const auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) +
2376 " { " + +" using " + std::string(name) + std::to_string(newColumn->GetID()) +
2377 "_type = " + retTypeName + "; }";
2378 fLoopManager->ToJitDeclare(retTypeDeclaration);
2379
2380 fLoopManager->RegisterCustomColumn(newColumn.get());
2381 newCols.AddName(name);
2382 newCols.AddColumn(newColumn, name);
2383
2384 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2385
2386 return newInterface;
2387 }
2388
2389 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2390 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2391 // this way compilation of `Define` has no way to continue after throwing the error.
2392 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2393 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2394 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2395 typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2397 {
2398 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2399 "Error in `Define`: type returned by expression is not default-constructible");
2400 return *this; // never reached
2401 }
2402
2403 ////////////////////////////////////////////////////////////////////////////
2404 /// \brief Implementation of snapshot
2405 /// \param[in] treename The name of the TTree
2406 /// \param[in] filename The name of the TFile
2407 /// \param[in] columnList The list of names of the branches to be written
2408 /// The implementation exploits Foreach. The association of the addresses to
2409 /// the branches takes place at the first event. This is possible because
2410 /// since there are no copies, the address of the value passed by reference
2411 /// is the address pointing to the storage of the read/created object in/by
2412 /// the TTreeReaderValue/TemporaryBranch
2413 template <typename... ColumnTypes>
2415 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2416 {
2417 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2418
2419 const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2420
2421 auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2423
2424 const std::string fullTreename(treename);
2425 // split name into directory and treename if needed
2426 const auto lastSlash = treename.rfind('/');
2427 std::string_view dirname = "";
2428 if (std::string_view::npos != lastSlash) {
2429 dirname = treename.substr(0, lastSlash);
2430 treename = treename.substr(lastSlash + 1, treename.size());
2431 }
2432
2433 // add action node to functional graph and run event loop
2434 std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2436 // single-thread snapshot
2437 using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2439 actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2440 fProxiedPtr, std::move(newColumns)));
2441 } else {
2442 // multi-thread snapshot
2443 using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2445 actionPtr.reset(new Action_t(
2446 Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2447 fProxiedPtr, std::move(newColumns)));
2448 }
2449
2450 fLoopManager->Book(actionPtr.get());
2451
2452 return RDFInternal::CreateSnaphotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2453 std::move(actionPtr));
2454 }
2455
2456 ////////////////////////////////////////////////////////////////////////////
2457 /// \brief Implementation of cache
2458 template <typename... BranchTypes, std::size_t... S>
2459 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2460 {
2461 // Check at compile time that the columns types are copy constructible
2462 constexpr bool areCopyConstructible =
2463 RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2464 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2465
2466 // We share bits and pieces with snapshot. De facto this is a snapshot
2467 // in memory!
2468 RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2469
2470 auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2471 auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2472
2473 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2474
2475 (void)s; // Prevents unused warning
2476
2477 return cachedRDF;
2478 }
2479
2480protected:
2481 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2483 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2484 {
2485 }
2486
2488
2489 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2490
2491 /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2492 /// which is expensive in terms of runtime, is called at most once.
2493 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2494 {
2496 fDataSource);
2497 }
2498
2499 template <typename... ColumnTypes, std::size_t... S>
2502 {
2503 return fDataSource
2504 ? RDFInternal::AddDSColumns(*fLoopManager, validCols, fCustomColumns, *fDataSource,
2505 fLoopManager->GetNSlots(), std::index_sequence_for<ColumnTypes...>(),
2508 }
2509};
2510
2511} // end NS RDF
2512
2513} // namespace ROOT
2514
2515#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:42
unsigned long long ULong64_t
Definition: RtypesCore.h:70
const Int_t kError
Definition: TError.h:39
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
typedef void((*Func_t)())
unsigned int GetID() const
Return the unique identifier of this RCustomColumnBase.
The head node of a RDF computation graph.
const std::map< std::string, std::string > & GetAliasMap() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void RegisterCustomColumn(RCustomColumnBase *column)
void ToJitDeclare(const std::string &s)
void ToJitExec(const std::string &s)
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void JitDeclarations()
Declare to the interpreter type aliases and other entities required by RDF jitted nodes.
void AddColumnAlias(const std::string &alias, const std::string &colName)
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
Helper class that provides the operation graph nodes.
An action node in a RDF computation graph.
Definition: RAction.hxx:215
Encapsulates the columns defined by the user.
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, std::string_view name)
Internally it recreates the map with the new column, and swaps with the old one.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
const RCustomColumnBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
void AddName(std::string_view name)
Internally it recreates the map with the new column name, and swaps with the old one.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:89
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:546
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action)
RLoopManager * GetLoopManager() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:324
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:126
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action)
Definition: RInterface.hxx:782
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:623
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:295
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action)
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:105
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,...
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:108
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:228
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:572
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:407
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:611
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:672
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
Definition: RInterface.hxx:103
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:96
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:873
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:371
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:101
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:985
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:732
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default)
Definition: RInterface.hxx:906
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:686
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:465
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:483
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:354
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:832
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:752
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:248
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:187
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:855
void CheckIMTDisabled(std::string_view callerName)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:91
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:212
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:710
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:948
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:72
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:35
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:346
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
HeadNode_t CreateSnaphotRDF(const ColumnNames_t &validCols, std::string_view treeName, std::string_view fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:83
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
ColumnNames_t ConvertRegexToColumns(const RDFInternal::RBookedCustomColumns &customColumns, TTree *tree, ROOT::RDF::RDataSource *dataSource, std::string_view columnNameRegexp, std::string_view callerName)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::string JitBuildAction(const ColumnNames_t &bl, void *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::shared_ptr< RJittedAction > *jittedActionOnHeap, unsigned int namespaceID)
bool IsInternalColumn(std::string_view colName)
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:297
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2rvec, unsigned int customColID)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:197
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
void BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const std::shared_ptr< RJittedCustomColumn > &jittedCustomColumn, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches)
void BookFilterJit(RJittedFilter *jittedFilter, void *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds, unsigned int namespaceID)
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
double T(double x)
Definition: ChebyshevPol.h:34
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:579
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:610
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:788
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:596
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1194
RooArgSet S(const RooAbsArg &v1)
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
A collection of options to steer the creation of the dataset on file.
bool fLazy
Delay the snapshot of the dataset.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27