Logo ROOT   6.16/01
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RRange.hxx"
20#include "ROOT/RDF/Utils.hxx"
23#include "ROOT/RResultPtr.hxx"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/TypeTraits.hxx"
27#include "RtypesCore.h" // for ULong64_t
28#include "TH1.h" // For Histo actions
29#include "TH2.h" // For Histo actions
30#include "TH3.h" // For Histo actions
31#include "TInterpreter.h"
32#include "TProfile.h"
33#include "TProfile2D.h"
34#include "TROOT.h" // IsImplicitMTEnabled
35
36#include <algorithm>
37#include <cstddef>
38#include <initializer_list>
39#include <limits>
40#include <memory>
41#include <sstream>
42#include <stdexcept>
43#include <string>
44#include <type_traits> // is_same, enable_if
45#include <typeinfo>
46#include <vector>
47
48class TGraph;
49
50// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
51namespace ROOT {
52class RDataFrame;
53namespace Internal {
54namespace RDF {
56}
57}
58}
59namespace cling {
60std::string printValue(ROOT::RDataFrame *tdf);
61}
62
63namespace ROOT {
64namespace RDF {
67namespace TTraits = ROOT::TypeTraits;
68
69template <typename Proxied, typename DataSource>
70class RInterface;
71
72using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
73
74// clang-format off
75/**
76 * \class ROOT::RDF::RInterface
77 * \ingroup dataframe
78 * \brief The public interface to the RDataFrame federation of classes
79 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
80 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
81 *
82 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
83 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
84 */
85// clang-format on
86template <typename Proxied, typename DataSource = void>
88 using DS_t = DataSource;
93 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
96 std::string_view columnNameRegexp,
97 std::string_view callerName);
100 const std::string &fullTreeName,
101 const std::string &fileName,
102 bool isLazy,
103 RLoopManager &loopManager,
104 std::unique_ptr<RDFInternal::RActionBase> actionPtr);
105
106 template <typename T, typename W>
107 friend class RInterface;
108
109 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
110 ///< The RLoopManager at the root of this computation graph. Never null.
112 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
114
115 /// Contains the custom columns defined up to this node.
117
118public:
119 ////////////////////////////////////////////////////////////////////////////
120 /// \brief Copy-assignment operator for RInterface.
121 RInterface &operator=(const RInterface &) = default;
122
123 ////////////////////////////////////////////////////////////////////////////
124 /// \brief Copy-ctor for RInterface.
125 RInterface(const RInterface &) = default;
126
127 ////////////////////////////////////////////////////////////////////////////
128 /// \brief Move-ctor for RInterface.
129 RInterface(RInterface &&) = default;
130
131 ////////////////////////////////////////////////////////////////////////////
132 /// \brief Only enabled when building a RInterface<RLoopManager>
133 template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
134 RInterface(const std::shared_ptr<Proxied> &proxied)
135 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
136 {
138 }
139
140 ////////////////////////////////////////////////////////////////////////////
141 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
142 /// Different RDataFrame methods return different C++ types. All nodes, however,
143 /// can be cast to this common type at the cost of a small performance penalty.
144 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
145 /// around via (non-template, C++11) helper functions.
146 /// Example usage:
147 /// ~~~{.cpp}
148 /// // a function that conditionally adds a Range to a RDataFrame node.
149 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
150 /// {
151 /// return mustAddRange ? df.Range(1) : df;
152 /// }
153 /// // use as :
154 /// ROOT::RDataFrame df(10);
155 /// auto maybeRanged = MaybeAddRange(df, true);
156 /// ~~~
157 /// Note that it is not a problem to pass RNode's by value.
158 operator RNode() const
159 {
160 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
162 }
163
164 ////////////////////////////////////////////////////////////////////////////
165 /// \brief Append a filter to the call graph.
166 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
167 /// signalling whether the event has passed the selection (true) or not (false).
168 /// \param[in] columns Names of the columns/branches in input to the filter function.
169 /// \param[in] name Optional name of this filter. See `Report`.
170 /// \return the filter node of the computation graph.
171 ///
172 /// Append a filter node at the point of the call graph corresponding to the
173 /// object this method is called on.
174 /// The callable `f` should not have side-effects (e.g. modification of an
175 /// external or static variable) to ensure correct results when implicit
176 /// multi-threading is active.
177 ///
178 /// RDataFrame only evaluates filters when necessary: if multiple filters
179 /// are chained one after another, they are executed in order and the first
180 /// one returning false causes the event to be discarded.
181 /// Even if multiple actions or transformations depend on the same filter,
182 /// it is executed once per entry. If its result is requested more than
183 /// once, the cached result is served.
184 ///
185 /// ### Example usage:
186 /// ~~~{.cpp}
187 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
188 /// auto filtered = df.Filter(myCut, {"x", "y"});
189 //
190 /// // String: it must contain valid C++ except that column names can be used instead of variable names
191 /// auto filtered = df.Filter("x*y > 0");
192 /// ~~~
193 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
195 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
196 {
197 RDFInternal::CheckFilter(f);
198 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
199 constexpr auto nColumns = ColTypes_t::list_size;
200 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
201 const auto newColumns =
202 CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
203
205
206 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
207 fLoopManager->Book(filterPtr.get());
208 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
209 }
210
211 ////////////////////////////////////////////////////////////////////////////
212 /// \brief Append a filter to the call graph.
213 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
214 /// signalling whether the event has passed the selection (true) or not (false).
215 /// \param[in] name Optional name of this filter. See `Report`.
216 /// \return the filter node of the computation graph.
217 ///
218 /// Refer to the first overload of this method for the full documentation.
219 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
221 {
222 // The sfinae is there in order to pick up the overloaded method which accepts two strings
223 // rather than this template method.
224 return Filter(f, {}, name);
225 }
226
227 ////////////////////////////////////////////////////////////////////////////
228 /// \brief Append a filter to the call graph.
229 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
230 /// signalling whether the event has passed the selection (true) or not (false).
231 /// \param[in] columns Names of the columns/branches in input to the filter function.
232 /// \return the filter node of the computation graph.
233 ///
234 /// Refer to the first overload of this method for the full documentation.
235 template <typename F>
236 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
237 {
238 return Filter(f, ColumnNames_t{columns});
239 }
240
241 ////////////////////////////////////////////////////////////////////////////
242 /// \brief Append a filter to the call graph.
243 /// \param[in] expression The filter expression in C++
244 /// \param[in] name Optional name of this filter. See `Report`.
245 /// \return the filter node of the computation graph.
246 ///
247 /// The expression is just-in-time compiled and used to filter entries. It must
248 /// be valid C++ syntax in which variable names are substituted with the names
249 /// of branches/columns.
250 ///
251 /// Refer to the first overload of this method for the full documentation.
253 {
254 // deleted by the jitted call to JitFilterHelper
255 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
256 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
257 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
258 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
259
260 RDFInternal::BookFilterJit(jittedFilter.get(), upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
263
264 fLoopManager->Book(jittedFilter.get());
267 }
268
269 // clang-format off
270 ////////////////////////////////////////////////////////////////////////////
271 /// \brief Creates a custom column
272 /// \param[in] name The name of the custom column.
273 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
274 /// \param[in] columns Names of the columns/branches in input to the producer function.
275 /// \return the first node of the computation graph for which the new quantity is defined.
276 ///
277 /// Create a custom column that will be visible from all subsequent nodes
278 /// of the functional chain. The `expression` is only evaluated for entries that pass
279 /// all the preceding filters.
280 /// A new variable is created called `name`, accessible as if it was contained
281 /// in the dataset from subsequent transformations/actions.
282 ///
283 /// Use cases include:
284 /// * caching the results of complex calculations for easy and efficient multiple access
285 /// * extraction of quantities of interest from complex objects
286 ///
287 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
288 ///
289 /// ### Example usage:
290 /// ~~~{.cpp}
291 /// // assuming a function with signature:
292 /// double myComplexCalculation(const RVec<float> &muon_pts);
293 /// // we can pass it directly to Define
294 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
295 /// // alternatively, we can pass the body of the function as a string, as in Filter:
296 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
297 /// ~~~
298 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
300 {
301 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
302 }
303 // clang-format on
304
305 // clang-format off
306 ////////////////////////////////////////////////////////////////////////////
307 /// \brief Creates a custom column with a value dependent on the processing slot.
308 /// \param[in] name The name of the custom column.
309 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
310 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
311 /// \return the first node of the computation graph for which the new quantity is defined.
312 ///
313 /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
314 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
315 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
316 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
317 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
318 ///
319 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
320 /// ~~~{.cpp}
321 /// int function(unsigned int, double, double);
322 /// df.Define("x", function, {"tdfslot_", "column1", "column2"})
323 /// df.DefineSlot("x", function, {"column1", "column2"})
324 /// ~~~
325 ///
326 /// See Define for more information.
327 template <typename F>
329 {
330 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
331 }
332 // clang-format on
333
334 // clang-format off
335 ////////////////////////////////////////////////////////////////////////////
336 /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
337 /// \param[in] name The name of the custom column.
338 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
339 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
340 /// \return the first node of the computation graph for which the new quantity is defined.
341 ///
342 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
343 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
344 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
345 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
346 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
347 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
348 ///
349 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
350 /// ~~~{.cpp}
351 /// int function(unsigned int, ULong64_t, double, double);
352 /// Define("x", function, {"tdfslot_", "tdfentry_", "column1", "column2"})
353 /// DefineSlotEntry("x", function, {"column1", "column2"})
354 /// ~~~
355 ///
356 /// See Define for more information.
357 template <typename F>
359 {
360 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
361 }
362 // clang-format on
363
364 ////////////////////////////////////////////////////////////////////////////
365 /// \brief Creates a custom column
366 /// \param[in] name The name of the custom column.
367 /// \param[in] expression An expression in C++ which represents the temporary value
368 /// \return the first node of the computation graph for which the new quantity is defined.
369 ///
370 /// The expression is just-in-time compiled and used to produce the column entries.
371 /// It must be valid C++ syntax in which variable names are substituted with the names
372 /// of branches/columns.
373 ///
374 /// Refer to the first overload of this method for the full documentation.
376 {
377 // this check must be done before jitting lest we throw exceptions in jitted code
381
382 auto jittedCustomColumn =
383 std::make_shared<RDFDetail::RJittedCustomColumn>(fLoopManager, name, fLoopManager->GetNSlots());
384
385 RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, jittedCustomColumn, fCustomColumns,
387
389 newCols.AddName(name);
390 newCols.AddColumn(jittedCustomColumn, name);
391
392 fLoopManager->RegisterCustomColumn(jittedCustomColumn.get());
393
394 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
395
396 return newInterface;
397 }
398
399 ////////////////////////////////////////////////////////////////////////////
400 /// \brief Allow to refer to a column with a different name
401 /// \param[in] alias name of the column alias
402 /// \param[in] columnName of the column to be aliased
403 /// \return the first node of the computation graph for which the alias is available.
404 ///
405 /// Aliasing an alias is supported.
406 ///
407 /// ### Example usage:
408 /// ~~~{.cpp}
409 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
410 /// ~~~
412 {
413 // The symmetry with Define is clear. We want to:
414 // - Create globally the alias and return this very node, unchanged
415 // - Make aliases accessible based on chains and not globally
416
417 // Helper to find out if a name is a column
418 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
419
420 // If the alias name is a column name, there is a problem
422 fLoopManager->GetAliasMap(), dsColumnNames);
423
424 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
425
426 fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
427
429
430 newCols.AddName(alias);
431 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
432
433 return newInterface;
434 }
435
436 ////////////////////////////////////////////////////////////////////////////
437 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
438 /// \tparam ColumnTypes variadic list of branch/column types.
439 /// \param[in] treename The name of the output TTree.
440 /// \param[in] filename The name of the output TFile.
441 /// \param[in] columnList The list of names of the columns/branches to be written.
442 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
443 /// \return a `RDataFrame` that wraps the snapshotted dataset.
444 ///
445 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
446 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
447 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
448 /// written out and it appears before the array in the columnList.
449 ///
450 /// ### Example invocations:
451 ///
452 /// ~~~{.cpp}
453 /// // without specifying template parameters (column types automatically deduced)
454 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
455 ///
456 /// // specifying template parameters ("x" is `int`, "y" is `float`)
457 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
458 /// ~~~
459 ///
460 /// #### Using Snapshot as a lazy action
461 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
462 /// `RSnapshotOptions`:
463 /// ~~~{.cpp}
464 /// RSnapshotOptions opts;
465 /// opts.fLazy = true;
466 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
467 /// ~~~
468 template <typename... ColumnTypes>
470 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
471 const RSnapshotOptions &options = RSnapshotOptions())
472 {
473 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
474 }
475
476 ////////////////////////////////////////////////////////////////////////////
477 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
478 /// \param[in] treename The name of the output TTree.
479 /// \param[in] filename The name of the output TFile.
480 /// \param[in] columnList The list of names of the columns/branches to be written.
481 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
482 /// \return a `RDataFrame` that wraps the snapshotted dataset.
483 ///
484 /// This function returns a `RDataFrame` built with the output tree as a source.
485 /// The types of the columns are automatically inferred and do not need to be specified.
486 ///
487 /// See above for a more complete description and example usages.
489 const ColumnNames_t &columnList,
490 const RSnapshotOptions &options = RSnapshotOptions())
491 {
492 // Early return: if the list of columns is empty, just return an empty RDF
493 // If we proceed, the jitted call will not compile!
494 if (columnList.empty()) {
495 auto nEntries = *this->Count();
496 auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
497 return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
498 }
499 auto tree = fLoopManager->GetTree();
500 const auto nsID = fLoopManager->GetID();
501 std::stringstream snapCall;
502 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
503 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
505
506 // build a string equivalent to
507 // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
509 snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
511 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
512 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
513
514 const auto &customCols = fCustomColumns.GetNames();
515 const auto dontConvertVector = false;
516
517 const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
518
519 for (auto &c : validColumnNames) {
520 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
521 const auto customColID = isCustom ? fCustomColumns.GetColumns()[c]->GetID() : 0;
522 snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom, dontConvertVector,
523 customColID)
524 << ", ";
525 };
526 if (!columnList.empty())
527 snapCall.seekp(-2, snapCall.cur); // remove the last ",
528 snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
529 << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
530 << RDFInternal::PrettyPrintAddr(&columnList) << "),"
531 << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
532 // jit snapCall, return result
533 TInterpreter::EErrorCode errorCode;
534 gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
535 if (TInterpreter::EErrorCode::kNoError != errorCode) {
536 std::string msg = "Cannot jit Snapshot call. Interpreter error code is " + std::to_string(errorCode) + ".";
537 throw std::runtime_error(msg);
538 }
539 return resPtr;
540 }
541
542 // clang-format off
543 ////////////////////////////////////////////////////////////////////////////
544 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
545 /// \param[in] treename The name of the output TTree.
546 /// \param[in] filename The name of the output TFile.
547 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. See the documentation of TRegexp for more details. An empty string signals the selection of all columns.
548 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
549 /// \return a `RDataFrame` that wraps the snapshotted dataset.
550 ///
551 /// This function returns a `RDataFrame` built with the output tree as a source.
552 /// The types of the columns are automatically inferred and do not need to be specified.
553 ///
554 /// See above for a more complete description and example usages.
556 std::string_view columnNameRegexp = "",
557 const RSnapshotOptions &options = RSnapshotOptions())
558 {
559 auto selectedColumns = RDFInternal::ConvertRegexToColumns(*this, columnNameRegexp, "Snapshot");
560 return Snapshot(treename, filename, selectedColumns, options);
561 }
562 // clang-format on
563
564 // clang-format off
565 ////////////////////////////////////////////////////////////////////////////
566 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
567 /// \param[in] treename The name of the output TTree.
568 /// \param[in] filename The name of the output TFile.
569 /// \param[in] columnList The list of names of the columns/branches to be written.
570 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
571 /// \return a `RDataFrame` that wraps the snapshotted dataset.
572 ///
573 /// This function returns a `RDataFrame` built with the output tree as a source.
574 /// The types of the columns are automatically inferred and do not need to be specified.
575 ///
576 /// See above for a more complete description and example usages.
578 std::initializer_list<std::string> columnList,
579 const RSnapshotOptions &options = RSnapshotOptions())
580 {
581 ColumnNames_t selectedColumns(columnList);
582 return Snapshot(treename, filename, selectedColumns, options);
583 }
584 // clang-format on
585
586 ////////////////////////////////////////////////////////////////////////////
587 /// \brief Save selected columns in memory
588 /// \tparam ColumnTypes variadic list of branch/column types.
589 /// \param[in] columns to be cached in memory.
590 /// \return a `RDataFrame` that wraps the cached dataset.
591 ///
592 /// This action returns a new `RDataFrame` object, completely detached from
593 /// the originating `RDataFrame`. The new dataframe only contains the cached
594 /// columns and stores their content in memory for fast, zero-copy subsequent access.
595 ///
596 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
597 /// fits in memory and that will be accessed many times.
598 template <typename... ColumnTypes>
600 {
601 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
602 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
603 }
604
605 ////////////////////////////////////////////////////////////////////////////
606 /// \brief Save selected columns in memory
607 /// \param[in] columns to be cached in memory
608 /// \return a `RDataFrame` that wraps the cached dataset.
609 ///
610 /// See the previous overloads for more information.
612 {
613 // Early return: if the list of columns is empty, just return an empty RDF
614 // If we proceed, the jitted call will not compile!
615 if (columnList.empty()) {
616 auto nEntries = *this->Count();
617 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
618 return emptyRDF;
619 }
620
621 auto tree = fLoopManager->GetTree();
622 const auto nsID = fLoopManager->GetID();
623 std::stringstream cacheCall;
624 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
625 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
627 // build a string equivalent to
628 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
629 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
630 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
632 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
633 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
634
635 const auto &customCols = fCustomColumns.GetNames();
636 for (auto &c : columnList) {
637 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
638 const auto customColID = isCustom ? fCustomColumns.GetColumns()[c]->GetID() : 0;
639 cacheCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom,
640 /*vector2rvec=*/true, customColID)
641 << ", ";
642 };
643 if (!columnList.empty())
644 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
645 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
646 << RDFInternal::PrettyPrintAddr(&columnList) << "));";
647 // jit cacheCall, return result
648 TInterpreter::EErrorCode errorCode;
649 gInterpreter->Calc(cacheCall.str().c_str(), &errorCode);
650 if (TInterpreter::EErrorCode::kNoError != errorCode) {
651 std::string msg = "Cannot jit Cache call. Interpreter error code is " + std::to_string(errorCode) + ".";
652 throw std::runtime_error(msg);
653 }
654 return resRDF;
655 }
656
657 ////////////////////////////////////////////////////////////////////////////
658 /// \brief Save selected columns in memory
659 /// \param[in] a regular expression to select the columns
660 /// \return a `RDataFrame` that wraps the cached dataset.
661 ///
662 /// The existing columns are matched against the regeular expression. If the string provided
663 /// is empty, all columns are selected. See the previous overloads for more information.
665 {
666 auto selectedColumns = RDFInternal::ConvertRegexToColumns(*this, columnNameRegexp, "Cache");
667 return Cache(selectedColumns);
668 }
669
670 ////////////////////////////////////////////////////////////////////////////
671 /// \brief Save selected columns in memory
672 /// \param[in] columns to be cached in memory.
673 /// \return a `RDataFrame` that wraps the cached dataset.
674 ///
675 /// See the previous overloads for more information.
676 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
677 {
678 ColumnNames_t selectedColumns(columnList);
679 return Cache(selectedColumns);
680 }
681
682 // clang-format off
683 ////////////////////////////////////////////////////////////////////////////
684 /// \brief Creates a node that filters entries based on range: [begin, end)
685 /// \param[in] begin Initial entry number considered for this range.
686 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
687 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
688 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
689 ///
690 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
691 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
692 // clang-format on
693 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
694 {
695 // check invariants
696 if (stride == 0 || (end != 0 && end < begin))
697 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
698 CheckIMTDisabled("Range");
699
701 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
702 fLoopManager->Book(rangePtr.get());
704 return tdf_r;
705 }
706
707 // clang-format off
708 ////////////////////////////////////////////////////////////////////////////
709 /// \brief Creates a node that filters entries based on range
710 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
711 /// \return a node of the computation graph for which the range is defined.
712 ///
713 /// See the other Range overload for a detailed description.
714 // clang-format on
715 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
716
717 // clang-format off
718 ////////////////////////////////////////////////////////////////////////////
719 /// \brief Execute a user-defined function on each entry (*instant action*)
720 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
721 /// \param[in] columns Names of the columns/branches in input to the user function.
722 ///
723 /// The callable `f` is invoked once per entry. This is an *instant action*:
724 /// upon invocation, an event loop as well as execution of all scheduled actions
725 /// is triggered.
726 /// Users are responsible for the thread-safety of this callable when executing
727 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
728 // clang-format on
729 template <typename F>
730 void Foreach(F f, const ColumnNames_t &columns = {})
731 {
732 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
733 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
734 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
735 }
736
737 // clang-format off
738 ////////////////////////////////////////////////////////////////////////////
739 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
740 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
741 /// \param[in] columns Names of the columns/branches in input to the user function.
742 ///
743 /// Same as `Foreach`, but the user-defined function takes an extra
744 /// `unsigned int` as its first parameter, the *processing slot index*.
745 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
746 /// for each thread of execution.
747 /// This is meant as a helper in writing thread-safe `Foreach`
748 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
749 /// The user-defined processing callable is able to follow different
750 /// *streams of processing* indexed by the first parameter.
751 /// `ForeachSlot` works just as well with single-thread execution: in that
752 /// case `slot` will always be `0`.
753 // clang-format on
754 template <typename F>
755 void ForeachSlot(F f, const ColumnNames_t &columns = {})
756 {
757 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
758 constexpr auto nColumns = ColTypes_t::list_size;
759
760 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
761
762 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
763
764 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
766
767 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, newColumns);
768 fLoopManager->Book(action.get());
769
770 fLoopManager->Run();
771 }
772
773 // clang-format off
774 ////////////////////////////////////////////////////////////////////////////
775 /// \brief Execute a user-defined reduce operation on the values of a column.
776 /// \tparam F The type of the reduce callable. Automatically deduced.
777 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
778 /// \param[in] f A callable with signature `T(T,T)`
779 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
780 /// \return the reduced quantity wrapped in a `RResultPtr`.
781 ///
782 /// A reduction takes two values of a column and merges them into one (e.g.
783 /// by summing them, taking the maximum, etc). This action performs the
784 /// specified reduction operation on all processed column values, returning
785 /// a single value of the same type. The callable f must satisfy the general
786 /// requirements of a *processing function* besides having signature `T(T,T)`
787 /// where `T` is the type of column columnName.
788 ///
789 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
790 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
791 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
792 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
793 /// overload.
794 ///
795 /// This action is *lazy*: upon invocation of this method the calculation is
796 /// booked but not executed. See RResultPtr documentation.
797 // clang-format on
798 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
800 {
801 static_assert(
802 std::is_default_constructible<T>::value,
803 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
804 return Reduce(std::move(f), columnName, T());
805 }
806
807 ////////////////////////////////////////////////////////////////////////////
808 /// \brief Execute a user-defined reduce operation on the values of a column.
809 /// \tparam F The type of the reduce callable. Automatically deduced.
810 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
811 /// \param[in] f A callable with signature `T(T,T)`
812 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
813 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
814 /// \return the reduced quantity wrapped in a `RResultPtr`.
815 ///
816 /// See the description of the first Reduce overload for more information.
817 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
818 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
819 {
820 return Aggregate(f, f, columnName, redIdentity);
821 }
822
823 ////////////////////////////////////////////////////////////////////////////
824 /// \brief Return the number of entries processed (*lazy action*)
825 /// \return the number of entries wrapped in a `RResultPtr`.
826 ///
827 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
828 /// This action is *lazy*: upon invocation of this method the calculation is
829 /// booked but not executed. See RResultPtr documentation.
831 {
832 const auto nSlots = fLoopManager->GetNSlots();
833 auto cSPtr = std::make_shared<ULong64_t>(0);
834 using Helper_t = RDFInternal::CountHelper;
836 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, fCustomColumns);
837 fLoopManager->Book(action.get());
838 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
839 }
840
841 ////////////////////////////////////////////////////////////////////////////
842 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
843 /// \tparam T The type of the column.
844 /// \tparam COLL The type of collection used to store the values.
845 /// \param[in] column The name of the column to collect the values of.
846 /// \return the content of the selected column wrapped in a `RResultPtr`.
847 ///
848 /// The collection type to be specified for C-style array columns is `RVec<T>`:
849 /// in this case the returned collection is a `std::vector<RVec<T>>`.
850 /// ### Example usage:
851 /// ~~~{.cpp}
852 /// // In this case intCol is a std::vector<int>
853 /// auto intCol = rdf.Take<int>("integerColumn");
854 /// // Same content as above but in this case taken as a RVec<int>
855 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
856 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
857 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
858 /// ~~~
859 /// This action is *lazy*: upon invocation of this method the calculation is
860 /// booked but not executed. See RResultPtr documentation.
861 template <typename T, typename COLL = std::vector<T>>
863 {
864 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
865
866 const auto validColumnNames = GetValidatedColumnNames(1, columns);
867
868 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
869
870 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
872 auto valuesPtr = std::make_shared<COLL>();
873 const auto nSlots = fLoopManager->GetNSlots();
874
875 auto action = std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, newColumns);
876 fLoopManager->Book(action.get());
877 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
878 }
879
880 ////////////////////////////////////////////////////////////////////////////
881 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
882 /// \tparam V The type of the column used to fill the histogram.
883 /// \param[in] model The returned histogram will be constructed using this as a model.
884 /// \param[in] vName The name of the column that will fill the histogram.
885 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
886 ///
887 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
888 /// is filled with each one of the elements of the container. In case multiple columns of container type
889 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
890 /// possibly different lengths between events).
891 /// This action is *lazy*: upon invocation of this method the calculation is
892 /// booked but not executed. See RResultPtr documentation.
893 /// The user gives up ownership of the model histogram.
894 template <typename V = RDFDetail::RInferredType>
895 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
896 {
897 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
898 std::shared_ptr<::TH1D> h(nullptr);
899 {
900 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
901 h = model.GetHistogram();
902 h->SetDirectory(nullptr);
903 }
904
905 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
906 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
907 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(userColumns, h);
908 }
909
910 ////////////////////////////////////////////////////////////////////////////
911 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
912 /// \tparam V The type of the column used to fill the histogram.
913 /// \param[in] vName The name of the column that will fill the histogram.
914 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
915 ///
916 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
917 /// The "name" and "title" strings are built starting from the input column name.
918 /// See the description of the first Histo1D overload for more details.
919 template <typename V = RDFDetail::RInferredType>
921 {
922 const auto h_name = std::string(vName);
923 const auto h_title = h_name + ";" + h_name + ";";
924 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
925 }
926
927 ////////////////////////////////////////////////////////////////////////////
928 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
929 /// \tparam V The type of the column used to fill the histogram.
930 /// \tparam W The type of the column used as weights.
931 /// \param[in] model The returned histogram will be constructed using this as a model.
932 /// \param[in] vName The name of the column that will fill the histogram.
933 /// \param[in] wName The name of the column that will provide the weights.
934 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
935 ///
936 /// See the description of the first Histo1D overload for more details.
937 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
939 {
940 const std::vector<std::string_view> columnViews = {vName, wName};
941 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
942 ? ColumnNames_t()
943 : ColumnNames_t(columnViews.begin(), columnViews.end());
944 std::shared_ptr<::TH1D> h(nullptr);
945 {
946 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
947 h = model.GetHistogram();
948 }
949 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
950 }
951
952 ////////////////////////////////////////////////////////////////////////////
953 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
954 /// \tparam V The type of the column used to fill the histogram.
955 /// \tparam W The type of the column used as weights.
956 /// \param[in] vName The name of the column that will fill the histogram.
957 /// \param[in] wName The name of the column that will provide the weights.
958 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
959 ///
960 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
961 /// The "name" and "title" strings are built starting from the input column names.
962 /// See the description of the first Histo1D overload for more details.
963 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
965 {
966 // We build name and title based on the value and weight column names
967 const auto h_name = std::string(vName) + "*" + std::string(wName);
968 const auto h_title = h_name + ";" + h_name + ";";
969 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
970 }
971
972 ////////////////////////////////////////////////////////////////////////////
973 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
974 /// \tparam V The type of the column used to fill the histogram.
975 /// \tparam W The type of the column used as weights.
976 /// \param[in] model The returned histogram will be constructed using this as a model.
977 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
978 ///
979 /// This overload will use the first two default columns as column names.
980 /// See the description of the first Histo1D overload for more details.
981 template <typename V, typename W>
982 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
983 {
984 return Histo1D<V, W>(model, "", "");
985 }
986
987 ////////////////////////////////////////////////////////////////////////////
988 /// \brief Fill and return a two-dimensional histogram (*lazy action*)
989 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
990 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
991 /// \param[in] model The returned histogram will be constructed using this as a model.
992 /// \param[in] v1Name The name of the column that will fill the x axis.
993 /// \param[in] v2Name The name of the column that will fill the y axis.
994 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
995 ///
996 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
997 /// is filled with each one of the elements of the container. In case multiple columns of container type
998 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
999 /// possibly different lengths between events).
1000 /// This action is *lazy*: upon invocation of this method the calculation is
1001 /// booked but not executed. See RResultPtr documentation.
1002 /// The user gives up ownership of the model histogram.
1003 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1005 {
1006 std::shared_ptr<::TH2D> h(nullptr);
1007 {
1008 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1009 h = model.GetHistogram();
1010 }
1011 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1012 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1013 }
1014 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1015 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1016 ? ColumnNames_t()
1017 : ColumnNames_t(columnViews.begin(), columnViews.end());
1018 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1019 }
1020
1021 ////////////////////////////////////////////////////////////////////////////
1022 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1023 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1024 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1025 /// \tparam W The type of the column used for the weights of the histogram.
1026 /// \param[in] model The returned histogram will be constructed using this as a model.
1027 /// \param[in] v1Name The name of the column that will fill the x axis.
1028 /// \param[in] v2Name The name of the column that will fill the y axis.
1029 /// \param[in] wName The name of the column that will provide the weights.
1030 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1031 ///
1032 /// This action is *lazy*: upon invocation of this method the calculation is
1033 /// booked but not executed. See RResultPtr documentation.
1034 /// The user gives up ownership of the model histogram.
1035 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1036 typename W = RDFDetail::RInferredType>
1039 {
1040 std::shared_ptr<::TH2D> h(nullptr);
1041 {
1042 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1043 h = model.GetHistogram();
1044 }
1045 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1046 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1047 }
1048 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1049 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1050 ? ColumnNames_t()
1051 : ColumnNames_t(columnViews.begin(), columnViews.end());
1052 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1053 }
1054
1055 template <typename V1, typename V2, typename W>
1057 {
1058 return Histo2D<V1, V2, W>(model, "", "", "");
1059 }
1060
1061 ////////////////////////////////////////////////////////////////////////////
1062 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1063 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1064 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1065 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1066 /// \param[in] model The returned histogram will be constructed using this as a model.
1067 /// \param[in] v1Name The name of the column that will fill the x axis.
1068 /// \param[in] v2Name The name of the column that will fill the y axis.
1069 /// \param[in] v3Name The name of the column that will fill the z axis.
1070 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1071 ///
1072 /// This action is *lazy*: upon invocation of this method the calculation is
1073 /// booked but not executed. See RResultPtr documentation.
1074 /// The user gives up ownership of the model histogram.
1075 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1076 typename V3 = RDFDetail::RInferredType>
1078 std::string_view v3Name = "")
1079 {
1080 std::shared_ptr<::TH3D> h(nullptr);
1081 {
1082 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1083 h = model.GetHistogram();
1084 }
1085 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1086 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1087 }
1088 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1089 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1090 ? ColumnNames_t()
1091 : ColumnNames_t(columnViews.begin(), columnViews.end());
1092 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1093 }
1094
1095 ////////////////////////////////////////////////////////////////////////////
1096 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1097 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1098 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1099 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1100 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1101 /// \param[in] model The returned histogram will be constructed using this as a model.
1102 /// \param[in] v1Name The name of the column that will fill the x axis.
1103 /// \param[in] v2Name The name of the column that will fill the y axis.
1104 /// \param[in] v3Name The name of the column that will fill the z axis.
1105 /// \param[in] wName The name of the column that will provide the weights.
1106 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1107 ///
1108 /// This action is *lazy*: upon invocation of this method the calculation is
1109 /// booked but not executed. See RResultPtr documentation.
1110 /// The user gives up ownership of the model histogram.
1111 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1112 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1114 std::string_view v3Name, std::string_view wName)
1115 {
1116 std::shared_ptr<::TH3D> h(nullptr);
1117 {
1118 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1119 h = model.GetHistogram();
1120 }
1121 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1122 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1123 }
1124 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1125 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1126 ? ColumnNames_t()
1127 : ColumnNames_t(columnViews.begin(), columnViews.end());
1128 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1129 }
1130
1131 template <typename V1, typename V2, typename V3, typename W>
1133 {
1134 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1135 }
1136
1137 ////////////////////////////////////////////////////////////////////////////
1138 /// \brief Fill and return a graph (*lazy action*)
1139 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1140 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1141 /// \param[in] v1Name The name of the column that will fill the x axis.
1142 /// \param[in] v2Name The name of the column that will fill the y axis.
1143 /// \return the graph wrapped in a `RResultPtr`.
1144 ///
1145 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1146 /// is filled with each one of the elements of the container.
1147 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1148 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1149 /// A name and a title to the graph is given based on the input column names.
1150 ///
1151 /// This action is *lazy*: upon invocation of this method the calculation is
1152 /// booked but not executed. See RResultPtr documentation.
1153 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1155 {
1156 auto graph = std::make_shared<::TGraph>();
1157 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1158 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1159 ? ColumnNames_t()
1160 : ColumnNames_t(columnViews.begin(), columnViews.end());
1161
1162 // We build a default name and title based on the input columns
1163 if (!(userColumns[0].empty() && userColumns[1].empty())) {
1164 const auto v2Name_str = std::string(v2Name);
1165 const auto g_name = std::string(v1Name) + "*" + v2Name_str;
1166 graph->SetNameTitle(g_name.c_str(), g_name.c_str());
1167 graph->GetXaxis()->SetTitle(g_name.c_str());
1168 graph->GetYaxis()->SetTitle(v2Name_str.c_str());
1169 }
1170
1171 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(userColumns, graph);
1172 }
1173
1174 ////////////////////////////////////////////////////////////////////////////
1175 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1176 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1177 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1178 /// \param[in] model The model to be considered to build the new return value.
1179 /// \param[in] v1Name The name of the column that will fill the x axis.
1180 /// \param[in] v2Name The name of the column that will fill the y axis.
1181 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1182 ///
1183 /// This action is *lazy*: upon invocation of this method the calculation is
1184 /// booked but not executed. See RResultPtr documentation.
1185 /// The user gives up ownership of the model profile object.
1186 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1189 {
1190 std::shared_ptr<::TProfile> h(nullptr);
1191 {
1192 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1193 h = model.GetProfile();
1194 }
1195
1196 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1197 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1198 }
1199 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1200 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1201 ? ColumnNames_t()
1202 : ColumnNames_t(columnViews.begin(), columnViews.end());
1203 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1204 }
1205
1206 ////////////////////////////////////////////////////////////////////////////
1207 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1208 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1209 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1210 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1211 /// \param[in] model The model to be considered to build the new return value.
1212 /// \param[in] v1Name The name of the column that will fill the x axis.
1213 /// \param[in] v2Name The name of the column that will fill the y axis.
1214 /// \param[in] wName The name of the column that will provide the weights.
1215 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1216 ///
1217 /// This action is *lazy*: upon invocation of this method the calculation is
1218 /// booked but not executed. See RResultPtr documentation.
1219 /// The user gives up ownership of the model profile object.
1220 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1221 typename W = RDFDetail::RInferredType>
1224 {
1225 std::shared_ptr<::TProfile> h(nullptr);
1226 {
1227 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1228 h = model.GetProfile();
1229 }
1230
1231 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1232 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1233 }
1234 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1235 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1236 ? ColumnNames_t()
1237 : ColumnNames_t(columnViews.begin(), columnViews.end());
1238 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1239 }
1240
1241 template <typename V1, typename V2, typename W>
1243 {
1244 return Profile1D<V1, V2, W>(model, "", "", "");
1245 }
1246
1247 ////////////////////////////////////////////////////////////////////////////
1248 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1249 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1250 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1251 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1252 /// \param[in] model The returned profile will be constructed using this as a model.
1253 /// \param[in] v1Name The name of the column that will fill the x axis.
1254 /// \param[in] v2Name The name of the column that will fill the y axis.
1255 /// \param[in] v3Name The name of the column that will fill the z axis.
1256 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1257 ///
1258 /// This action is *lazy*: upon invocation of this method the calculation is
1259 /// booked but not executed. See RResultPtr documentation.
1260 /// The user gives up ownership of the model profile.
1261 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1262 typename V3 = RDFDetail::RInferredType>
1264 std::string_view v2Name = "", std::string_view v3Name = "")
1265 {
1266 std::shared_ptr<::TProfile2D> h(nullptr);
1267 {
1268 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1269 h = model.GetProfile();
1270 }
1271
1272 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1273 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1274 }
1275 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1276 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1277 ? ColumnNames_t()
1278 : ColumnNames_t(columnViews.begin(), columnViews.end());
1279 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1280 }
1281
1282 ////////////////////////////////////////////////////////////////////////////
1283 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1284 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1285 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1286 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1287 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1288 /// \param[in] model The returned histogram will be constructed using this as a model.
1289 /// \param[in] v1Name The name of the column that will fill the x axis.
1290 /// \param[in] v2Name The name of the column that will fill the y axis.
1291 /// \param[in] v3Name The name of the column that will fill the z axis.
1292 /// \param[in] wName The name of the column that will provide the weights.
1293 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1294 ///
1295 /// This action is *lazy*: upon invocation of this method the calculation is
1296 /// booked but not executed. See RResultPtr documentation.
1297 /// The user gives up ownership of the model profile.
1298 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1299 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1301 std::string_view v3Name, std::string_view wName)
1302 {
1303 std::shared_ptr<::TProfile2D> h(nullptr);
1304 {
1305 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1306 h = model.GetProfile();
1307 }
1308
1309 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1310 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1311 }
1312 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1313 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1314 ? ColumnNames_t()
1315 : ColumnNames_t(columnViews.begin(), columnViews.end());
1316 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1317 }
1318
1319 template <typename V1, typename V2, typename V3, typename W>
1321 {
1322 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1323 }
1324
1325 ////////////////////////////////////////////////////////////////////////////
1326 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1327 ///
1328 /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1329 /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1330 /// specified columns (these types are passed as template parameters to this method).
1331 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1332 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1333 /// \tparam T The type of the object to fill. Automatically deduced.
1334 /// \param[in] model The model to be considered to build the new return value.
1335 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1336 /// \return the filled object wrapped in a `RResultPtr`.
1337 ///
1338 /// The user gives up ownership of the model object.
1339 /// The list of column names to be used for filling must always be specified.
1340 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1341 /// See RResultPtr documentation.
1342 template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1344 {
1345 auto h = std::make_shared<T>(std::forward<T>(model));
1346 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1347 throw std::runtime_error("The absence of axes limits is not supported yet.");
1348 }
1349 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1350 }
1351
1352 ////////////////////////////////////////////////////////////////////////////
1353 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1354 ///
1355 /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1356 /// method with these types. See previous overload for more information.
1357 /// \tparam T The type of the object to fill. Automatically deduced.
1358 /// \param[in] model The model to be considered to build the new return value.
1359 /// \param[in] columnList The name of the columns read to fill the object.
1360 /// \return the filled object wrapped in a `RResultPtr`.
1361 ///
1362 /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1363 /// previous overload. Check the previous overload for more details on `Fill`.
1364 template <typename T>
1366 {
1367 auto h = std::make_shared<T>(std::forward<T>(model));
1368 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1369 throw std::runtime_error("The absence of axes limits is not supported yet.");
1370 }
1371 return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1372 }
1373
1374 ////////////////////////////////////////////////////////////////////////////
1375 /// \brief Return the minimum of processed column values (*lazy action*)
1376 /// \tparam T The type of the branch/column.
1377 /// \param[in] columnName The name of the branch/column to be treated.
1378 /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1379 ///
1380 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1381 /// template specialization of this method.
1382 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1383 ///
1384 /// This action is *lazy*: upon invocation of this method the calculation is
1385 /// booked but not executed. See RResultPtr documentation.
1386 template <typename T = RDFDetail::RInferredType>
1388 {
1389 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1390 using RetType_t = RDFDetail::MinReturnType_t<T>;
1391 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1392 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1393 }
1394
1395 ////////////////////////////////////////////////////////////////////////////
1396 /// \brief Return the maximum of processed column values (*lazy action*)
1397 /// \tparam T The type of the branch/column.
1398 /// \param[in] columnName The name of the branch/column to be treated.
1399 /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1400 ///
1401 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1402 /// template specialization of this method.
1403 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1404 ///
1405 /// This action is *lazy*: upon invocation of this method the calculation is
1406 /// booked but not executed. See RResultPtr documentation.
1407 template <typename T = RDFDetail::RInferredType>
1409 {
1410 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1411 using RetType_t = RDFDetail::MaxReturnType_t<T>;
1412 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1413 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1414 }
1415
1416 ////////////////////////////////////////////////////////////////////////////
1417 /// \brief Return the mean of processed column values (*lazy action*)
1418 /// \tparam T The type of the branch/column.
1419 /// \param[in] columnName The name of the branch/column to be treated.
1420 /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1421 ///
1422 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1423 /// template specialization of this method.
1424 ///
1425 /// This action is *lazy*: upon invocation of this method the calculation is
1426 /// booked but not executed. See RResultPtr documentation.
1427 template <typename T = RDFDetail::RInferredType>
1429 {
1430 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1431 auto meanV = std::make_shared<double>(0);
1432 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1433 }
1434
1435 ////////////////////////////////////////////////////////////////////////////
1436 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1437 /// \tparam T The type of the branch/column.
1438 /// \param[in] columnName The name of the branch/column to be treated.
1439 /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1440 ///
1441 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1442 /// template specialization of this method.
1443 ///
1444 /// This action is *lazy*: upon invocation of this method the calculation is
1445 /// booked but not executed. See RResultPtr documentation.
1446 template <typename T = RDFDetail::RInferredType>
1448 {
1449 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1450 auto stdDeviationV = std::make_shared<double>(0);
1451 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1452 }
1453
1454 // clang-format off
1455 ////////////////////////////////////////////////////////////////////////////
1456 /// \brief Return the sum of processed column values (*lazy action*)
1457 /// \tparam T The type of the branch/column.
1458 /// \param[in] columnName The name of the branch/column.
1459 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1460 /// \return the sum of the selected column wrapped in a `RResultPtr`.
1461 ///
1462 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1463 /// template specialization of this method.
1464 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1465 ///
1466 /// This action is *lazy*: upon invocation of this method the calculation is
1467 /// booked but not executed. See RResultPtr documentation.
1468 template <typename T = RDFDetail::RInferredType>
1470 Sum(std::string_view columnName = "",
1471 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1472 {
1473 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1474 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1475 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1476 }
1477 // clang-format on
1478
1479 ////////////////////////////////////////////////////////////////////////////
1480 /// \brief Gather filtering statistics
1481 /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1482 ///
1483 /// Calling `Report` on the main `RDataFrame` object gathers stats for
1484 /// all named filters in the call graph. Calling this method on a
1485 /// stored chain state (i.e. a graph node different from the first) gathers
1486 /// the stats for all named filters in the chain section between the original
1487 /// `RDataFrame` and that node (included). Stats are gathered in the same
1488 /// order as the named filters have been added to the graph.
1489 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1490 /// effects cuts had.
1491 ///
1492 /// This action is *lazy*: upon invocation of
1493 /// this method the calculation is booked but not executed. See RResultPtr
1494 /// documentation.
1495
1497 {
1498 bool returnEmptyReport = false;
1499 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1500 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1501 // certainly does not contain named filters.
1502 // The number 4 takes into account the implicit columns for entry and slot number
1503 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1504 if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1505 returnEmptyReport = true;
1506
1507 auto rep = std::make_shared<RCutFlowReport>();
1508 using Helper_t = RDFInternal::ReportHelper<Proxied>;
1510
1511 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1513
1514 fLoopManager->Book(action.get());
1515 return MakeResultPtr(rep, *fLoopManager, std::move(action));
1516 }
1517
1518 /////////////////////////////////////////////////////////////////////////////
1519 /// \brief Returns the names of the available columns
1520 /// \return the container of column names.
1521 ///
1522 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1524 {
1525 ColumnNames_t allColumns;
1526
1527 auto addIfNotInternal = [&allColumns](std::string_view colName) {
1528 if (!RDFInternal::IsInternalColumn(colName))
1529 allColumns.emplace_back(colName);
1530 };
1531
1532 auto columnNames = fCustomColumns.GetNames();
1533
1534 std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1535
1536 auto tree = fLoopManager->GetTree();
1537 if (tree) {
1538 auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1539 allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1540 }
1541
1542 if (fDataSource) {
1543 auto &dsColNames = fDataSource->GetColumnNames();
1544 allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1545 }
1546
1547 return allColumns;
1548 }
1549
1550 /////////////////////////////////////////////////////////////////////////////
1551 /// \brief Return the type of a given column as a string.
1552 /// \return the type of the required column.
1553 ///
1554 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1556 {
1557 const auto &customCols = fCustomColumns.GetNames();
1558 const bool convertVector2RVec = true;
1559 const auto isCustom = std::find(customCols.begin(), customCols.end(), column) != customCols.end();
1560 if (!isCustom) {
1561 return RDFInternal::ColumnName2ColumnTypeName(std::string(column), fLoopManager->GetID(),
1563 convertVector2RVec);
1564 } else {
1565 // must convert the alias "__tdf::column_type" to a readable type
1566 const auto colID = std::to_string(fCustomColumns.GetColumns()[std::string(column)]->GetID());
1567 const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__tdf" + std::to_string(fLoopManager->GetID()) +
1568 "::" + std::string(column) + colID + "_type))";
1569 const auto callRes = gInterpreter->Calc(call.c_str());
1570 return *reinterpret_cast<std::string *>(callRes); // copy result to stack
1571 }
1572 }
1573
1574 /// \brief Returns the names of the filters created.
1575 /// \return the container of filters names.
1576 ///
1577 /// If called on a root node, all the filters in the computation graph will
1578 /// be printed. For any other node, only the filters upstream of that node.
1579 /// Filters without a name are printed as "Unnamed Filter"
1580 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1581 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1582
1583 /// \brief Returns the names of the defined columns
1584 /// \return the container of the defined column names.
1585 ///
1586 /// This is not an action nor a transformation, just a simple utility to
1587 /// get the columns names that have been defined up to the node.
1588 /// If no custom column has been defined, e.g. on a root node, it returns an
1589 /// empty array.
1591 {
1592 ColumnNames_t definedColumns;
1593
1594 auto columns = fCustomColumns.GetColumns();
1595
1596 for (auto column : columns) {
1597 if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1598 definedColumns.emplace_back(column.first);
1599 }
1600
1601 return definedColumns;
1602 }
1603
1604 // clang-format off
1605 ////////////////////////////////////////////////////////////////////////////
1606 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1607 /// \tparam F The type of the aggregator callable. Automatically deduced.
1608 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1609 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1610 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1611 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1612 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1613 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
1614 /// \return the result of the aggregation wrapped in a `RResultPtr`.
1615 ///
1616 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
1617 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
1618 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
1619 /// the value of the column columnName.
1620 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
1621 /// Otherwise the signature of aggregator must be `void(U&,T)`.
1622 ///
1623 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
1624 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
1625 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
1626 ///
1627 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
1628 // clang-format on
1629 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
1630 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
1631 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
1632 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
1633 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
1634 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
1635 {
1636 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
1637 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1638 constexpr auto nColumns = ArgTypes::list_size;
1639
1640 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1641
1642 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
1643
1644 auto accObjPtr = std::make_shared<U>(aggIdentity);
1645 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
1646 using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
1647 auto action = std::make_unique<Action_t>(
1648 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
1649 fProxiedPtr, newColumns);
1650 fLoopManager->Book(action.get());
1651 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
1652 }
1653
1654 // clang-format off
1655 ////////////////////////////////////////////////////////////////////////////
1656 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1657 /// \tparam F The type of the aggregator callable. Automatically deduced.
1658 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1659 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1660 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
1661 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1662 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1663 /// \return the result of the aggregation wrapped in a `RResultPtr`.
1664 ///
1665 /// See previous Aggregate overload for more information.
1666 // clang-format on
1667 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
1668 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
1669 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
1670 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
1671 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
1672 {
1673 static_assert(
1674 std::is_default_constructible<U>::value,
1675 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
1676 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
1677 }
1678
1679 // clang-format off
1680 ////////////////////////////////////////////////////////////////////////////
1681 /// \brief Book execution of a custom action using a user-defined helper object.
1682 /// \tparam ColumnTypes List of types of columns used by this action.
1683 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
1684 /// \param[in] helper The Action Helper to be scheduled.
1685 /// \param[in] columns The names of the columns on which the helper acts.
1686 /// \return the result of the helper wrapped in a `RResultPtr`.
1687 ///
1688 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
1689 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
1690 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
1691 ///
1692 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
1693 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
1694 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
1695 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
1696 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
1697 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
1698 /// the requested columns for the particular entry being processed.
1699 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
1700 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
1701 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
1702 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
1703 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
1704 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
1705 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
1706 /// method concurrently, but will always pass different 'slot' numbers.
1707 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
1708 /// Result_t). The RResultPtr returned by Book will point to this object.
1709 ///
1710 /// See $ROOTSYS/tree/treeplayer/inc/ROOT/RDF/ActionHelpers.hxx for the helpers used by standard RDF actions.
1711 // clang-format on
1712 template <typename... ColumnTypes, typename Helper>
1713 RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
1714 {
1715 constexpr auto nColumns = sizeof...(ColumnTypes);
1716 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
1717
1718 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1719
1720 // TODO add more static sanity checks on Helper
1721 using AH = RDFDetail::RActionImpl<Helper>;
1722 static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
1723 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
1724
1725 using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
1726 auto resPtr = helper.GetResultPtr();
1727
1728 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
1730
1731 auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
1733 fLoopManager->Book(action.get());
1734 return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
1735 }
1736
1737 ////////////////////////////////////////////////////////////////////////////
1738 /// \brief Provides a representation of the columns in the dataset
1739 /// \tparam ColumnTypes variadic list of branch/column types.
1740 /// \param[in] columnList Names of the columns to be displayed.
1741 /// \param[in] rows Number of events for each column to be displayed.
1742 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
1743 ///
1744 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
1745 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
1746 /// complete version through `AsString()`.
1747 template <typename... ColumnTypes>
1748 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
1749 {
1750 CheckIMTDisabled("Display");
1751
1752 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
1753 return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
1754 }
1755
1756 ////////////////////////////////////////////////////////////////////////////
1757 /// \brief Provides a representation of the columns in the dataset
1758 /// \param[in] columnList Names of the columns to be displayed.
1759 /// \param[in] rows Number of events for each column to be displayed.
1760 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
1761 ///
1762 /// This overload automatically infers the column types.
1763 /// See the previous overloads for further details.
1764 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
1765 {
1766 CheckIMTDisabled("Display");
1767 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
1768 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
1769 columnList.size());
1770 }
1771
1772 ////////////////////////////////////////////////////////////////////////////
1773 /// \brief Provides a representation of the columns in the dataset
1774 /// \param[in] columnNameRegexp A regular expression to select the columns.
1775 /// \param[in] rows Number of events for each column to be displayed.
1776 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
1777 ///
1778 /// The existing columns are matched against the regular expression. If the string provided
1779 /// is empty, all columns are selected.
1780 /// See the previous overloads for further details.
1781 RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
1782 {
1783 auto selectedColumns = RDFInternal::ConvertRegexToColumns(*this, columnNameRegexp, "Display");
1784 return Display(selectedColumns, nRows);
1785 }
1786
1787 ////////////////////////////////////////////////////////////////////////////
1788 /// \brief Provides a representation of the columns in the dataset
1789 /// \param[in] columnList Names of the columns to be displayed.
1790 /// \param[in] nRows Number of events for each column to be displayed.
1791 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
1792 ///
1793 /// See the previous overloads for further details.
1794 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
1795 {
1796 ColumnNames_t selectedColumns(columnList);
1797 return Display(selectedColumns, nRows);
1798 }
1799
1800private:
1802 {
1804
1805 // Entry number column
1806 const auto entryColName = "rdfentry_";
1807 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
1808 using NewColEntry_t =
1809 RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
1810
1811 auto entryColumn = std::make_shared<NewColEntry_t>(fLoopManager, entryColName, std::move(entryColGen),
1812 newCols.GetNames(), fLoopManager->GetNSlots(), newCols);
1813 newCols.AddName(entryColName);
1814 newCols.AddColumn(entryColumn, entryColName);
1815
1816 fLoopManager->RegisterCustomColumn(entryColumn.get());
1817
1818 // Declare return type to the interpreter, for future use by jitted actions
1819 auto retTypeDeclaration = "namespace __tdf" + std::to_string(fLoopManager->GetID()) + " { using " + entryColName +
1820 std::to_string(entryColumn->GetID()) + "_type = ULong64_t; }";
1821 gInterpreter->Declare(retTypeDeclaration.c_str());
1822
1823 // Slot number column
1824 const auto slotColName = "rdfslot_";
1825 auto slotColGen = [](unsigned int slot) { return slot; };
1826 using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
1827
1828 auto slotColumn = std::make_shared<NewColSlot_t>(fLoopManager, slotColName, std::move(slotColGen),
1829 newCols.GetNames(), fLoopManager->GetNSlots(), newCols);
1830
1831 newCols.AddName(slotColName);
1832 newCols.AddColumn(slotColumn, slotColName);
1833
1834 fLoopManager->RegisterCustomColumn(slotColumn.get());
1835
1836 fCustomColumns = std::move(newCols);
1837
1838 // Declare return type to the interpreter, for future use by jitted actions
1839 retTypeDeclaration = "namespace __tdf" + std::to_string(fLoopManager->GetID()) + " { using " + slotColName +
1840 std::to_string(slotColumn->GetID()) + "_type = unsigned int; }";
1841 gInterpreter->Declare(retTypeDeclaration.c_str());
1842
1843 fLoopManager->AddColumnAlias("tdfentry_", entryColName);
1844 fCustomColumns.AddName("tdfentry_");
1845 fLoopManager->AddColumnAlias("tdfslot_", slotColName);
1846 fCustomColumns.AddName("tdfslot_");
1847 }
1848
1849 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
1850 {
1851 std::vector<std::string> types;
1852
1853 for (auto column : columnList) {
1854 types.push_back(GetColumnType(column));
1855 }
1856 return types;
1857 }
1858
1860 {
1862 std::string error(callerName);
1863 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
1864 throw std::runtime_error(error);
1865 }
1866 }
1867
1868 // Type was specified by the user, no need to infer it
1869 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
1870 typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1871 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
1872 {
1873 constexpr auto nColumns = sizeof...(BranchTypes);
1874
1875 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1876
1877 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
1879
1880 const auto nSlots = fLoopManager->GetNSlots();
1881
1882 auto action =
1883 RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{}, newColumns);
1884 fLoopManager->Book(action.get());
1885 return MakeResultPtr(r, *fLoopManager, std::move(action));
1886 }
1887
1888 // User did not specify type, do type inference
1889 // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
1890 // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
1891 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
1892 typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1894 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
1895 {
1896 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
1897
1898 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
1899 const unsigned int nSlots = fLoopManager->GetNSlots();
1900
1901 auto tree = fLoopManager->GetTree();
1902 auto rOnHeap = RDFInternal::MakeSharedOnHeap(r);
1903
1904 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
1905 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
1906 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
1907
1908 auto jittedActionOnHeap =
1909 RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*fLoopManager));
1910
1911 auto toJit = RDFInternal::JitBuildAction(
1912 validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
1913 tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, fLoopManager->GetID());
1914 fLoopManager->Book(jittedActionOnHeap->get());
1915 fLoopManager->ToJit(toJit);
1916 return MakeResultPtr(r, *fLoopManager, *jittedActionOnHeap);
1917 }
1918
1919 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
1920 typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
1921 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
1922 {
1926
1927 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
1928 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
1929 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
1930 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
1931 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
1932
1933 constexpr auto nColumns = ColTypes_t::list_size;
1934
1935 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1936
1937 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
1938
1940 RDFInternal::RBookedCustomColumns newCols(newColumns);
1941 auto newColumn = std::make_shared<NewCol_t>(fLoopManager, name, std::forward<F>(expression), validColumnNames,
1942 fLoopManager->GetNSlots(), newCols);
1943
1944 // Declare return type to the interpreter, for future use by jitted actions
1945 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
1946 if (retTypeName.empty()) {
1947 // The type is not known to the interpreter.
1948 // Forward-declare it as void + helpful comment, so that if this Define'd quantity is
1949 // ever used by jitted code users will have some way to know what went wrong
1950 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
1951 retTypeName = "void /* The type of column \"" + std::string(name) + "\" (" + demangledType +
1952 ") is not known to the interpreter. */";
1953 }
1954 const auto retTypeDeclaration = "namespace __tdf" + std::to_string(fLoopManager->GetID()) +
1955 " { " + +" using " + std::string(name) + std::to_string(newColumn->GetID()) +
1956 "_type = " + retTypeName + "; }";
1957 gInterpreter->Declare(retTypeDeclaration.c_str());
1958
1959 fLoopManager->RegisterCustomColumn(newColumn.get());
1960 newCols.AddName(name);
1961 newCols.AddColumn(newColumn, name);
1962
1963 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
1964
1965 return newInterface;
1966 }
1967
1968 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
1969 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
1970 // this way compilation of `Define` has no way to continue after throwing the error.
1971 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
1972 bool IsFStringConv = std::is_convertible<F, std::string>::value,
1973 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
1974 typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
1976 {
1977 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1978 "Error in `Define`: type returned by expression is not default-constructible");
1979 return *this; // never reached
1980 }
1981
1982 ////////////////////////////////////////////////////////////////////////////
1983 /// \brief Implementation of snapshot
1984 /// \param[in] treename The name of the TTree
1985 /// \param[in] filename The name of the TFile
1986 /// \param[in] columnList The list of names of the branches to be written
1987 /// The implementation exploits Foreach. The association of the addresses to
1988 /// the branches takes place at the first event. This is possible because
1989 /// since there are no copies, the address of the value passed by reference
1990 /// is the address pointing to the storage of the read/created object in/by
1991 /// the TTreeReaderValue/TemporaryBranch
1992 template <typename... ColumnTypes>
1994 const ColumnNames_t &columnList, const RSnapshotOptions &options)
1995 {
1996 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
1997
1998 const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
1999
2002
2003 const std::string fullTreename(treename);
2004 // split name into directory and treename if needed
2005 const auto lastSlash = treename.rfind('/');
2006 std::string_view dirname = "";
2007 if (std::string_view::npos != lastSlash) {
2008 dirname = treename.substr(0, lastSlash);
2009 treename = treename.substr(lastSlash + 1, treename.size());
2010 }
2011
2012 // add action node to functional graph and run event loop
2013 std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2015 // single-thread snapshot
2016 using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2018 actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2019 fProxiedPtr, newColumns));
2020 } else {
2021 // multi-thread snapshot
2022 using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2024 actionPtr.reset(new Action_t(
2025 Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2026 fProxiedPtr, newColumns));
2027 }
2028
2029 fLoopManager->Book(actionPtr.get());
2030
2031 return RDFInternal::CreateSnaphotRDF(validCols, fullTreename, std::string(filename), options.fLazy, *fLoopManager, std::move(actionPtr));
2032 }
2033
2034 ////////////////////////////////////////////////////////////////////////////
2035 /// \brief Implementation of cache
2036 template <typename... BranchTypes, std::size_t... S>
2038 {
2039 // Check at compile time that the columns types are copy constructible
2040 constexpr bool areCopyConstructible =
2041 RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2042 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2043
2044 // We share bits and pieces with snapshot. De facto this is a snapshot
2045 // in memory!
2046 RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2047
2048 auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2049 auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2050
2051 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2052
2053 (void)s; // Prevents unused warning
2054
2055 return cachedRDF;
2056 }
2057
2058protected:
2059 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, RDFInternal::RBookedCustomColumns columns,
2060 RDataSource *ds)
2061 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2062 {
2063 }
2064
2066
2067 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2068
2069 /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2070 /// which is expensive in terms of runtime, is called at most once.
2071 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2072 {
2074 fDataSource);
2075 }
2076
2077 template <typename... ColumnTypes, std::size_t... S>
2080 {
2081 return fDataSource
2082 ? RDFInternal::AddDSColumns(*fLoopManager, validCols, fCustomColumns, *fDataSource,
2086 }
2087};
2088
2089} // end NS RDF
2090
2091} // namespace ROOT
2092
2093#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
unsigned long long ULong64_t
Definition: RtypesCore.h:70
const Int_t kError
Definition: TError.h:39
int type
Definition: TGX11.cxx:120
#define gInterpreter
Definition: TInterpreter.h:538
typedef void((*Func_t)())
unsigned int GetID() const
Return the unique identifier of this RCustomColumnBase.
The head node of a RDF computation graph.
const std::map< std::string, std::string > & GetAliasMap() const
void ToJit(const std::string &s)
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void RegisterCustomColumn(RCustomColumnBase *column)
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddColumnAlias(const std::string &alias, const std::string &colName)
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
Helper class that provides the operation graph nodes.
An action node in a RDF computation graph.
Definition: RAction.hxx:209
Encapsulates the columns defined by the user.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, const std::string_view &name)
Internally it recreates the map with the new column, and swaps with the old one.
void AddName(const std::string_view &name)
Internally it recreates the map with the new column name, and swaps with the old one.
RCustomColumnBasePtrMap_t GetColumns() const
Returns the list of the pointers to the defined columns.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:87
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:964
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:982
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:555
RLoopManager * GetLoopManager() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:328
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:134
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action)
Definition: RInterface.hxx:755
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:611
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:299
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:113
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,...
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:116
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:236
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:577
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:411
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:599
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:664
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
Definition: RInterface.hxx:111
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:94
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:830
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:375
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:109
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:920
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:938
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:715
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default)
Definition: RInterface.hxx:862
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:676
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:470
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:488
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:358
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:799
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:730
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:252
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Definition: RInterface.hxx:195
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:818
void CheckIMTDisabled(std::string_view callerName)
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, RDFInternal::RBookedCustomColumns columns, RDataSource *ds)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:89
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:220
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:693
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:895
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:72
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:41
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:333
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
ColumnNames_t ConvertRegexToColumns(const ROOT::RDF::RNode &node, std::string_view columnNameRegexp, std::string_view callerName)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:82
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::string JitBuildAction(const ColumnNames_t &bl, void *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::shared_ptr< RJittedAction > *jittedActionOnHeap, unsigned int namespaceID)
HeadNode_t CreateSnaphotRDF(const ColumnNames_t &validCols, const std::string &fullTreeName, const std::string &fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
bool IsInternalColumn(std::string_view colName)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec, unsigned int customColID)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:186
void BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const std::shared_ptr< RJittedCustomColumn > &jittedCustomColumn, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches)
void BookFilterJit(RJittedFilter *jittedFilter, void *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds, unsigned int namespaceID)
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
double T(double x)
Definition: ChebyshevPol.h:34
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:607
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:790
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1193
RooArgSet S(const RooAbsArg &v1)
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Print a TSeq at the prompt:
Definition: TDatime.h:115
std::string printValue(const TDatime *val)
Print a TDatime at the prompt.
Definition: TDatime.cxx:514
Definition: graph.py:1
std::unique_ptr< T > make_unique(Args &&... args)
Definition: RMakeUnique.hxx:26
make_index_sequence< sizeof...(_Tp)> index_sequence_for
basic_string_view< char > string_view
Definition: RStringView.hxx:35
make_integer_sequence< size_t, _Np > make_index_sequence
Definition: tree.py:1
A collection of options to steer the creation of the dataset on file.
bool fLazy
Delay the snapshot of the dataset.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
A struct which stores the parameters of a TProfile2D.
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27