Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
27#include "ROOT/RDF/RRange.hxx"
29#include "ROOT/RDF/Utils.hxx"
32#include "ROOT/RResultPtr.hxx"
34#include <string_view>
35#include "ROOT/RVec.hxx"
36#include "ROOT/TypeTraits.hxx"
37#include "RtypesCore.h" // for ULong64_t
38#include "TDirectory.h"
39#include "TH1.h" // For Histo actions
40#include "TH2.h" // For Histo actions
41#include "TH3.h" // For Histo actions
42#include "THn.h"
43#include "TProfile.h"
44#include "TProfile2D.h"
45#include "TStatistic.h"
46
47#include <algorithm>
48#include <cstddef>
49#include <initializer_list>
50#include <iterator> // std::back_insterter
51#include <limits>
52#include <memory>
53#include <set>
54#include <sstream>
55#include <stdexcept>
56#include <string>
57#include <type_traits> // is_same, enable_if
58#include <typeinfo>
59#include <unordered_set>
60#include <utility> // std::index_sequence
61#include <vector>
62
63class TGraph;
64
65// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
66namespace ROOT {
69void EnableImplicitMT(UInt_t numthreads);
70class RDataFrame;
71} // namespace ROOT
72namespace cling {
73std::string printValue(ROOT::RDataFrame *tdf);
74}
75
76namespace ROOT {
77namespace RDF {
80namespace TTraits = ROOT::TypeTraits;
81
82template <typename Proxied, typename DataSource>
83class RInterface;
84
85using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
86} // namespace RDF
87
88namespace Internal {
89namespace RDF {
91void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
92void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end);
95std::string GetDataSourceLabel(const ROOT::RDF::RNode &node);
96} // namespace RDF
97} // namespace Internal
98
99namespace RDF {
100
101// clang-format off
102/**
103 * \class ROOT::RDF::RInterface
104 * \ingroup dataframe
105 * \brief The public interface to the RDataFrame federation of classes.
106 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
107 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
108 *
109 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
110 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
111 */
112// clang-format on
113template <typename Proxied, typename DataSource = void>
115 using DS_t = DataSource;
119 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
121
122 template <typename T, typename W>
123 friend class RInterface;
124
126 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
127 friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end);
129 friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node);
130 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
131
132public:
133 ////////////////////////////////////////////////////////////////////////////
134 /// \brief Copy-assignment operator for RInterface.
135 RInterface &operator=(const RInterface &) = default;
136
137 ////////////////////////////////////////////////////////////////////////////
138 /// \brief Copy-ctor for RInterface.
139 RInterface(const RInterface &) = default;
140
141 ////////////////////////////////////////////////////////////////////////////
142 /// \brief Move-ctor for RInterface.
143 RInterface(RInterface &&) = default;
144
145 ////////////////////////////////////////////////////////////////////////////
146 /// \brief Move-assignment operator for RInterface.
148
149 ////////////////////////////////////////////////////////////////////////////
150 /// \brief Build a RInterface from a RLoopManager.
151 /// This constructor is only available for RInterface<RLoopManager>.
152 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
153 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
154 {
155 }
156
157 ////////////////////////////////////////////////////////////////////////////
158 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
159 /// Different RDataFrame methods return different C++ types. All nodes, however,
160 /// can be cast to this common type at the cost of a small performance penalty.
161 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
162 /// around via (non-template, C++11) helper functions.
163 /// Example usage:
164 /// ~~~{.cpp}
165 /// // a function that conditionally adds a Range to a RDataFrame node.
166 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
167 /// {
168 /// return mustAddRange ? df.Range(1) : df;
169 /// }
170 /// // use as :
171 /// ROOT::RDataFrame df(10);
172 /// auto maybeRanged = MaybeAddRange(df, true);
173 /// ~~~
174 /// Note that it is not a problem to pass RNode's by value.
175 operator RNode() const
176 {
177 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
178 }
179
180 ////////////////////////////////////////////////////////////////////////////
181 /// \brief Append a filter to the call graph.
182 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
183 /// signalling whether the event has passed the selection (true) or not (false).
184 /// \param[in] columns Names of the columns/branches in input to the filter function.
185 /// \param[in] name Optional name of this filter. See `Report`.
186 /// \return the filter node of the computation graph.
187 ///
188 /// Append a filter node at the point of the call graph corresponding to the
189 /// object this method is called on.
190 /// The callable `f` should not have side-effects (e.g. modification of an
191 /// external or static variable) to ensure correct results when implicit
192 /// multi-threading is active.
193 ///
194 /// RDataFrame only evaluates filters when necessary: if multiple filters
195 /// are chained one after another, they are executed in order and the first
196 /// one returning false causes the event to be discarded.
197 /// Even if multiple actions or transformations depend on the same filter,
198 /// it is executed once per entry. If its result is requested more than
199 /// once, the cached result is served.
200 ///
201 /// ### Example usage:
202 /// ~~~{.cpp}
203 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
204 /// auto filtered = df.Filter(myCut, {"x", "y"});
205 ///
206 /// // String: it must contain valid C++ except that column names can be used instead of variable names
207 /// auto filtered = df.Filter("x*y > 0");
208 /// ~~~
209 ///
210 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
211 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
212 /// ~~~{.cpp}
213 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
214 /// ~~~
215 /// but instead this will:
216 /// ~~~{.cpp}
217 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
218 /// ~~~
219 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
221 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
222 {
223 RDFInternal::CheckFilter(f);
224 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
225 constexpr auto nColumns = ColTypes_t::list_size;
226 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
227 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
228
230
231 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
232 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
233 }
234
235 ////////////////////////////////////////////////////////////////////////////
236 /// \brief Append a filter to the call graph.
237 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
238 /// signalling whether the event has passed the selection (true) or not (false).
239 /// \param[in] name Optional name of this filter. See `Report`.
240 /// \return the filter node of the computation graph.
241 ///
242 /// Refer to the first overload of this method for the full documentation.
243 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
245 {
246 // The sfinae is there in order to pick up the overloaded method which accepts two strings
247 // rather than this template method.
248 return Filter(f, {}, name);
249 }
250
251 ////////////////////////////////////////////////////////////////////////////
252 /// \brief Append a filter to the call graph.
253 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
254 /// signalling whether the event has passed the selection (true) or not (false).
255 /// \param[in] columns Names of the columns/branches in input to the filter function.
256 /// \return the filter node of the computation graph.
257 ///
258 /// Refer to the first overload of this method for the full documentation.
259 template <typename F>
260 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
261 {
262 return Filter(f, ColumnNames_t{columns});
263 }
264
265 ////////////////////////////////////////////////////////////////////////////
266 /// \brief Append a filter to the call graph.
267 /// \param[in] expression The filter expression in C++
268 /// \param[in] name Optional name of this filter. See `Report`.
269 /// \return the filter node of the computation graph.
270 ///
271 /// The expression is just-in-time compiled and used to filter entries. It must
272 /// be valid C++ syntax in which variable names are substituted with the names
273 /// of branches/columns.
274 ///
275 /// ### Example usage:
276 /// ~~~{.cpp}
277 /// auto filtered_df = df.Filter("myCollection.size() > 3");
278 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
279 /// ~~~
280 ///
281 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
282 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
283 /// ~~~{.cpp}
284 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
285 /// ~~~
286 /// but instead this will:
287 /// ~~~{.cpp}
288 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
289 /// ~~~
290 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
291 {
292 // deleted by the jitted call to JitFilterHelper
293 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
294 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
295 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
296 const auto jittedFilter =
297 RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister,
298 fLoopManager->GetTree(), fDataSource);
299
301 }
302
303 ////////////////////////////////////////////////////////////////////////////
304 /// \brief Discard entries with missing values
305 /// \param[in] column Column name whose entries with missing values should be discarded
306 /// \return The filter node of the computation graph
307 ///
308 /// This operation is useful in case an entry of the dataset is incomplete,
309 /// i.e. if one or more of the columns do not have valid values. If the value
310 /// of the input column is missing for an entry, the entire entry will be
311 /// discarded from the rest of this branch of the computation graph.
312 ///
313 /// Use cases include:
314 /// * When processing multiple files, one or more of them is missing a column
315 /// * In horizontal joining with entry matching, a certain dataset has no
316 /// match for the current entry.
317 ///
318 /// ### Example usage:
319 ///
320 /// \code{.py}
321 /// # Assume a dataset with columns [idx, x] matching another dataset with
322 /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match
323 /// df = ROOT.RDataFrame(dataset)
324 /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
325 /// colz = df_nomissing.Take[int]("z")
326 /// \endcode
327 ///
328 /// \code{.cpp}
329 /// // Assume a dataset with columns [idx, x] matching another dataset with
330 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
331 /// ROOT::RDataFrame df{dataset};
332 /// auto df_nomissing = df.FilterAvailable("idx")
333 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
334 /// auto colz = df_nomissing.Take<int>("z");
335 /// \endcode
336 ///
337 /// \note See FilterMissing() if you want to keep only the entries with
338 /// missing values instead.
340 {
341 const auto columns = ColumnNames_t{column.data()};
343 // For now disable this functionality in case of an empty data source and
344 // the column name was not defined previously.
345 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
346 GetValidatedColumnNames(1, columns);
348 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns);
349 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
350 }
351
352 ////////////////////////////////////////////////////////////////////////////
353 /// \brief Keep only the entries that have missing values.
354 /// \param[in] column Column name whose entries with missing values should be kept
355 /// \return The filter node of the computation graph
356 ///
357 /// This operation is useful in case an entry of the dataset is incomplete,
358 /// i.e. if one or more of the columns do not have valid values. It only
359 /// keeps the entries for which the value of the input column is missing.
360 ///
361 /// Use cases include:
362 /// * When processing multiple files, one or more of them is missing a column
363 /// * In horizontal joining with entry matching, a certain dataset has no
364 /// match for the current entry.
365 ///
366 /// ### Example usage:
367 ///
368 /// \code{.py}
369 /// # Assume a dataset made of two files vertically chained together, one has
370 /// # column "x" and the other has column "y"
371 /// df = ROOT.RDataFrame(dataset)
372 /// df_valid_col_x = df.FilterMissing("y")
373 /// df_valid_col_y = df.FilterMissing("x")
374 /// display_x = df_valid_col_x.Display(("x",))
375 /// display_y = df_valid_col_y.Display(("y",))
376 /// \endcode
377 ///
378 /// \code{.cpp}
379 /// // Assume a dataset made of two files vertically chained together, one has
380 /// // column "x" and the other has column "y"
381 /// ROOT.RDataFrame df{dataset};
382 /// auto df_valid_col_x = df.FilterMissing("y");
383 /// auto df_valid_col_y = df.FilterMissing("x");
384 /// auto display_x = df_valid_col_x.Display<int>({"x"});
385 /// auto display_y = df_valid_col_y.Display<int>({"y"});
386 /// \endcode
387 ///
388 /// \note See FilterAvailable() if you want to discard the entries in case
389 /// there is a missing value instead.
391 {
392 const auto columns = ColumnNames_t{column.data()};
394 // For now disable this functionality in case of an empty data source and
395 // the column name was not defined previously.
396 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
397 GetValidatedColumnNames(1, columns);
399 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns);
400 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
401 }
402
403 // clang-format off
404 ////////////////////////////////////////////////////////////////////////////
405 /// \brief Define a new column.
406 /// \param[in] name The name of the defined column.
407 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
408 /// \param[in] columns Names of the columns/branches in input to the producer function.
409 /// \return the first node of the computation graph for which the new quantity is defined.
410 ///
411 /// Define a column that will be visible from all subsequent nodes
412 /// of the functional chain. The `expression` is only evaluated for entries that pass
413 /// all the preceding filters.
414 /// A new variable is created called `name`, accessible as if it was contained
415 /// in the dataset from subsequent transformations/actions.
416 ///
417 /// Use cases include:
418 /// * caching the results of complex calculations for easy and efficient multiple access
419 /// * extraction of quantities of interest from complex objects
420 ///
421 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
422 ///
423 /// ### Example usage:
424 /// ~~~{.cpp}
425 /// // assuming a function with signature:
426 /// double myComplexCalculation(const RVec<float> &muon_pts);
427 /// // we can pass it directly to Define
428 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
429 /// // alternatively, we can pass the body of the function as a string, as in Filter:
430 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
431 /// ~~~
432 ///
433 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
434 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
435 /// ~~~{.cpp}
436 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
437 /// ~~~
438 /// but instead this will:
439 /// ~~~{.cpp}
440 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
441 /// ~~~
442 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
443 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
444 {
445 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
446 }
447 // clang-format on
448
449 // clang-format off
450 ////////////////////////////////////////////////////////////////////////////
451 /// \brief Define a new column with a value dependent on the processing slot.
452 /// \param[in] name The name of the defined column.
453 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
454 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
455 /// \return the first node of the computation graph for which the new quantity is defined.
456 ///
457 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
458 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
459 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
460 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
461 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
462 ///
463 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
464 /// ~~~{.cpp}
465 /// int function(unsigned int, double, double);
466 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
467 /// df.DefineSlot("x", function, {"column1", "column2"})
468 /// ~~~
469 ///
470 /// See Define() for more information.
471 template <typename F>
472 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
473 {
474 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
475 }
476 // clang-format on
477
478 // clang-format off
479 ////////////////////////////////////////////////////////////////////////////
480 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
481 /// \param[in] name The name of the defined column.
482 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
483 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
484 /// \return the first node of the computation graph for which the new quantity is defined.
485 ///
486 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
487 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
488 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
489 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
490 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
491 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
492 ///
493 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
494 /// ~~~{.cpp}
495 /// int function(unsigned int, ULong64_t, double, double);
496 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
497 /// DefineSlotEntry("x", function, {"column1", "column2"})
498 /// ~~~
499 ///
500 /// See Define() for more information.
501 template <typename F>
502 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
503 {
504 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
505 "DefineSlotEntry");
506 }
507 // clang-format on
508
509 ////////////////////////////////////////////////////////////////////////////
510 /// \brief Define a new column.
511 /// \param[in] name The name of the defined column.
512 /// \param[in] expression An expression in C++ which represents the defined value
513 /// \return the first node of the computation graph for which the new quantity is defined.
514 ///
515 /// The expression is just-in-time compiled and used to produce the column entries.
516 /// It must be valid C++ syntax in which variable names are substituted with the names
517 /// of branches/columns.
518 ///
519 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
520 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
521 /// ~~~{.cpp}
522 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
523 /// ~~~
524 /// but instead this will:
525 /// ~~~{.cpp}
526 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
527 /// ~~~
528 ///
529 /// Refer to the first overload of this method for the full documentation.
530 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
531 {
532 constexpr auto where = "Define";
534 // these checks must be done before jitting lest we throw exceptions in jitted code
537
538 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
539 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
540 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
541
543 newCols.AddDefine(std::move(jittedDefine));
544
545 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
546
547 return newInterface;
548 }
549
550 ////////////////////////////////////////////////////////////////////////////
551 /// \brief Overwrite the value and/or type of an existing column.
552 /// \param[in] name The name of the column to redefine.
553 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
554 /// \param[in] columns Names of the columns/branches in input to the expression.
555 /// \return the first node of the computation graph for which the quantity is redefined.
556 ///
557 /// The old value of the column can be used as an input for the expression.
558 ///
559 /// An exception is thrown in case the column to redefine does not already exist.
560 /// See Define() for more information.
561 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
562 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
563 {
564 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
565 }
566
567 // clang-format off
568 ////////////////////////////////////////////////////////////////////////////
569 /// \brief Overwrite the value and/or type of an existing column.
570 /// \param[in] name The name of the column to redefine.
571 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
572 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
573 /// \return the first node of the computation graph for which the new quantity is defined.
574 ///
575 /// The old value of the column can be used as an input for the expression.
576 /// An exception is thrown in case the column to redefine does not already exist.
577 ///
578 /// See DefineSlot() for more information.
579 // clang-format on
580 template <typename F>
581 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
582 {
583 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
584 }
585
586 // clang-format off
587 ////////////////////////////////////////////////////////////////////////////
588 /// \brief Overwrite the value and/or type of an existing column.
589 /// \param[in] name The name of the column to redefine.
590 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
591 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
592 /// \return the first node of the computation graph for which the new quantity is defined.
593 ///
594 /// The old value of the column can be used as an input for the expression.
595 /// An exception is thrown in case the column to re-define does not already exist.
596 ///
597 /// See DefineSlotEntry() for more information.
598 // clang-format on
599 template <typename F>
600 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
601 {
602 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
603 "RedefineSlotEntry");
604 }
605
606 ////////////////////////////////////////////////////////////////////////////
607 /// \brief Overwrite the value and/or type of an existing column.
608 /// \param[in] name The name of the column to redefine.
609 /// \param[in] expression An expression in C++ which represents the defined value
610 /// \return the first node of the computation graph for which the new quantity is defined.
611 ///
612 /// The expression is just-in-time compiled and used to produce the column entries.
613 /// It must be valid C++ syntax in which variable names are substituted with the names
614 /// of branches/columns.
615 ///
616 /// The old value of the column can be used as an input for the expression.
617 /// An exception is thrown in case the column to re-define does not already exist.
618 ///
619 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
620 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
621 {
622 constexpr auto where = "Redefine";
627
628 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
629 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
630 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
631
633 newCols.AddDefine(std::move(jittedDefine));
634
635 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
636
637 return newInterface;
638 }
639
640 ////////////////////////////////////////////////////////////////////////////
641 /// \brief In case the value in the given column is missing, provide a default value
642 /// \tparam T The type of the column
643 /// \param[in] column Column name where missing values should be replaced by the given default value
644 /// \param[in] defaultValue Value to provide instead of a missing value
645 /// \return The node of the graph that will provide a default value
646 ///
647 /// This operation is useful in case an entry of the dataset is incomplete,
648 /// i.e. if one or more of the columns do not have valid values. It does not
649 /// modify the values of the column, but in case any entry is missing, it
650 /// will provide the default value to downstream nodes instead.
651 ///
652 /// Use cases include:
653 /// * When processing multiple files, one or more of them is missing a column
654 /// * In horizontal joining with entry matching, a certain dataset has no
655 /// match for the current entry.
656 ///
657 /// ### Example usage:
658 ///
659 /// \code{.cpp}
660 /// // Assume a dataset with columns [idx, x] matching another dataset with
661 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
662 /// ROOT::RDataFrame df{dataset};
663 /// auto df_default = df.DefaultValueFor("y", 33)
664 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
665 /// auto colz = df_default.Take<int>("z");
666 /// \endcode
667 ///
668 /// \code{.py}
669 /// df = ROOT.RDataFrame(dataset)
670 /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
671 /// colz = df_default.Take[int]("z")
672 /// \endcode
673 template <typename T>
674 RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue)
675 {
676 constexpr auto where{"DefaultValueFor"};
678 // For now disable this functionality in case of an empty data source and
679 // the column name was not defined previously.
680 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
681 RDFInternal::CheckForDefinition(where, column, fColRegister, fLoopManager->GetBranchNames(),
683 const auto validColumnNames = ColumnNames_t{column.data()};
684 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{});
685
686 // Declare return type to the interpreter, for future use by jitted actions
687 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T));
688 if (retTypeName.empty()) {
689 // The type is not known to the interpreter.
690 // We must not error out here, but if/when this column is used in jitted code
691 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T));
692 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
693 }
694
695 auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>(
696 column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager);
697
699 newCols.AddDefine(std::move(newColumn));
700
701 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
702
703 return newInterface;
704 }
705
706 // clang-format off
707 ////////////////////////////////////////////////////////////////////////////
708 /// \brief Define a new column that is updated when the input sample changes.
709 /// \param[in] name The name of the defined column.
710 /// \param[in] expression A C++ callable that computes the new value of the defined column.
711 /// \return the first node of the computation graph for which the new quantity is defined.
712 ///
713 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
714 /// where:
715 /// - `T` is the type of the defined column
716 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
717 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
718 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
719 /// being processed (see the class docs for more information).
720 ///
721 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
722 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
723 /// starts rather than at every entry.
724 ///
725 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
726 ///
727 /// ### Example usage:
728 /// ~~~{.cpp}
729 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
730 /// df.DefinePerSample("weightbysample",
731 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
732 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
733 /// ~~~
734 // clang-format on
735 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
736 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
737 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
738 {
739 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
740 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
742
743 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
744 if (retTypeName.empty()) {
745 // The type is not known to the interpreter.
746 // We must not error out here, but if/when this column is used in jitted code
747 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
748 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
749 }
750
751 auto newColumn =
752 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
753
755 newCols.AddDefine(std::move(newColumn));
756 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
757 return newInterface;
758 }
759
760 // clang-format off
761 ////////////////////////////////////////////////////////////////////////////
762 /// \brief Define a new column that is updated when the input sample changes.
763 /// \param[in] name The name of the defined column.
764 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
765 /// \return the first node of the computation graph for which the new quantity is defined.
766 ///
767 /// The expression is just-in-time compiled and used to produce the column entries.
768 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
769 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
770 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
771 ///
772 /// ### Example usage:
773 /// ~~~{.py}
774 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
775 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
776 /// ~~~
777 ///
778 /// \note
779 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
780 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
781 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
782 /// ~~~{.py}
783 /// ROOT.gInterpreter.Declare(
784 /// """
785 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
786 /// return id.Contains("sample1") ? 1.0f : 2.0f;
787 /// }
788 /// """)
789 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
790 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
791 /// ~~~
792 ///
793 /// \note
794 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
795 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
796 /// sample even starts, so column values are not accessible.
797 // clang-format on
798 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
799 {
800 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
801 // these checks must be done before jitting lest we throw exceptions in jitted code
802 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
804
805 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
806 auto jittedDefine =
807 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
808
810 newCols.AddDefine(std::move(jittedDefine));
811
812 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
813
814 return newInterface;
815 }
816
817 /// \brief Register systematic variations for a single existing column using custom variation tags.
818 /// \param[in] colName name of the column for which varied values are provided.
819 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
820 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
821 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
822 /// \param[in] inputColumns the names of the columns to be passed to the callable.
823 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
824 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
825 ///
826 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
827 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
828 /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
829 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
830 ///
831 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
832 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
833 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
834 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
835 ///
836 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
837 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
838 /// ~~~{.cpp}
839 /// auto nominal_hx =
840 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
841 /// .Filter("pt > k")
842 /// .Define("x", someFunc, {"pt"})
843 /// .Histo1D("x");
844 ///
845 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
846 /// hx["nominal"].Draw();
847 /// hx["pt:down"].Draw("SAME");
848 /// hx["pt:up"].Draw("SAME");
849 /// ~~~
850 /// RDataFrame computes all variations as part of a single loop over the data.
851 /// In particular, this means that I/O and computation of values shared
852 /// among variations only happen once for all variations. Thus, the event loop
853 /// run-time typically scales much better than linearly with the number of
854 /// variations.
855 ///
856 /// RDataFrame lazily computes the varied values required to produce the
857 /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
858 /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
859 /// run for the nominal case.
860 ///
861 /// See other overloads for examples when variations are added for multiple existing columns,
862 /// or when the tags are auto-generated instead of being directly defined.
863 template <typename F>
864 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
865 const std::vector<std::string> &variationTags, std::string_view variationName = "")
866 {
867 std::vector<std::string> colNames{{std::string(colName)}};
868 const std::string theVariationName{variationName.empty() ? colName : variationName};
869
870 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
871 theVariationName);
872 }
873
874 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
875 /// \param[in] colName name of the column for which varied values are provided.
876 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
877 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
878 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
879 /// \param[in] inputColumns the names of the columns to be passed to the callable.
880 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
881 /// `"1"`, etc.
882 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
883 /// colName is used if none is provided.
884 ///
885 /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
886 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
887 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
888 ///
889 /// Example usage:
890 /// ~~~{.cpp}
891 /// auto nominal_hx =
892 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
893 /// .Histo1D("x");
894 ///
895 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
896 /// hx["nominal"].Draw();
897 /// hx["x:0"].Draw("SAME");
898 /// hx["x:1"].Draw("SAME");
899 /// ~~~
900 ///
901 /// \sa This Vary() overload for more information.
902 template <typename F>
903 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
904 std::size_t nVariations, std::string_view variationName = "")
905 {
906 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
907
908 std::vector<std::string> variationTags;
909 variationTags.reserve(nVariations);
910 for (std::size_t i = 0u; i < nVariations; ++i)
911 variationTags.emplace_back(std::to_string(i));
912
913 const std::string theVariationName{variationName.empty() ? colName : variationName};
914
915 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
916 }
917
918 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
919 /// \param[in] colNames set of names of the columns for which varied values are provided.
920 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
921 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
922 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
923 /// \param[in] inputColumns the names of the columns to be passed to the callable.
924 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
925 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
926 ///
927 /// This overload of Vary takes a list of column names as first argument and
928 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
929 /// affected column. The `variationTags` are defined as `{"down", "up"}`.
930 ///
931 /// Example usage:
932 /// ~~~{.cpp}
933 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
934 /// auto nominal_hx =
935 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
936 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
937 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
938 /// {"down", "up"}, // variation tags
939 /// "ptAndEta") // variation name
940 /// .Histo1D("pt", "eta");
941 ///
942 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
943 /// hx["nominal"].Draw();
944 /// hx["ptAndEta:down"].Draw("SAME");
945 /// hx["ptAndEta:up"].Draw("SAME");
946 /// ~~~
947 ///
948 /// \sa This Vary() overload for more information.
949
950 template <typename F>
952 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
953 const std::vector<std::string> &variationTags, std::string_view variationName)
954 {
955 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
956 }
957
958 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
959 /// \param[in] colNames set of names of the columns for which varied values are provided.
960 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
961 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
962 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
963 /// \param[in] inputColumns the names of the columns to be passed to the callable.
964 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
965 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
966 /// colName is used if none is provided.
967 ///
968 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
969 /// is avoided.
970 ///
971 /// \sa This Vary() overload for more information.
972 template <typename F>
974 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
975 const std::vector<std::string> &variationTags, std::string_view variationName)
976 {
977 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
978 }
979
980 /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
981 /// \param[in] colNames set of names of the columns for which varied values are provided.
982 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
983 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
984 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
985 /// \param[in] inputColumns the names of the columns to be passed to the callable.
986 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
987 /// `"1"`, etc.
988 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
989 /// colName is used if none is provided.
990 ///
991 /// This overload of Vary takes a list of column names as first argument.
992 /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
993 /// will be auto-generated as the sequence 0...``nVariations-1``.
994 ///
995 /// Example usage:
996 /// ~~~{.cpp}
997 /// auto nominal_hx =
998 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
999 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
1000 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
1001 /// 2, // auto-generated variation tags
1002 /// "ptAndEta") // variation name
1003 /// .Histo1D("pt", "eta");
1004 ///
1005 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1006 /// hx["nominal"].Draw();
1007 /// hx["ptAndEta:0"].Draw("SAME");
1008 /// hx["ptAndEta:1"].Draw("SAME");
1009 /// ~~~
1010 ///
1011 /// \sa This Vary() overload for more information.
1012 template <typename F>
1014 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
1015 std::size_t nVariations, std::string_view variationName)
1016 {
1017 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
1018
1019 std::vector<std::string> variationTags;
1020 variationTags.reserve(nVariations);
1021 for (std::size_t i = 0u; i < nVariations; ++i)
1022 variationTags.emplace_back(std::to_string(i));
1023
1024 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
1025 }
1026
1027 /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
1028 /// \param[in] colNames set of names of the columns for which varied values are provided.
1029 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
1030 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
1031 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
1032 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1033 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1034 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1035 /// `"1"`, etc.
1036 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1037 /// colName is used if none is provided.
1038 ///
1039 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1040 /// is avoided.
1041 ///
1042 /// \sa This Vary() overload for more information.
1043 template <typename F>
1045 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
1046 std::size_t nVariations, std::string_view variationName)
1047 {
1048 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
1049 }
1050
1051 /// \brief Register systematic variations for a single existing column using custom variation tags.
1052 /// \param[in] colName name of the column for which varied values are provided.
1053 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1054 /// values for the specified column.
1055 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1056 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1057 /// colName is used if none is provided.
1058 ///
1059 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1060 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1061 /// defined as `{"down", "up"}`.
1062 /// ~~~{.cpp}
1063 /// auto nominal_hx =
1064 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
1065 /// .Filter("pt > k")
1066 /// .Define("x", someFunc, {"pt"})
1067 /// .Histo1D("x");
1068 ///
1069 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1070 /// hx["nominal"].Draw();
1071 /// hx["pt:down"].Draw("SAME");
1072 /// hx["pt:up"].Draw("SAME");
1073 /// ~~~
1074 ///
1075 /// \sa This Vary() overload for more information.
1076 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
1077 const std::vector<std::string> &variationTags, std::string_view variationName = "")
1078 {
1079 std::vector<std::string> colNames{{std::string(colName)}};
1080 const std::string theVariationName{variationName.empty() ? colName : variationName};
1081
1082 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
1083 }
1084
1085 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
1086 /// \param[in] colName name of the column for which varied values are provided.
1087 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1088 /// values for the specified column.
1089 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1090 /// `"1"`, etc.
1091 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1092 /// colName is used if none is provided.
1093 ///
1094 /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
1095 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1096 /// auto-generated.
1097 /// ~~~{.cpp}
1098 /// auto nominal_hx =
1099 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
1100 /// .Histo1D("pt");
1101 ///
1102 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1103 /// hx["nominal"].Draw();
1104 /// hx["pt:0"].Draw("SAME");
1105 /// hx["pt:1"].Draw("SAME");
1106 /// ~~~
1107 ///
1108 /// \sa This Vary() overload for more information.
1109 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
1110 std::string_view variationName = "")
1111 {
1112 std::vector<std::string> variationTags;
1113 variationTags.reserve(nVariations);
1114 for (std::size_t i = 0u; i < nVariations; ++i)
1115 variationTags.emplace_back(std::to_string(i));
1116
1117 return Vary(colName, expression, std::move(variationTags), variationName);
1118 }
1119
1120 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1121 /// \param[in] colNames set of names of the columns for which varied values are provided.
1122 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1123 /// values for the specified columns.
1124 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1125 /// `"1"`, etc.
1126 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1127 ///
1128 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1129 /// compiled. It takes an nVariations parameter instead of a list of tag names.
1130 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
1131 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
1132 /// The example below shows how Vary() is used while dealing with multiple columns.
1133 ///
1134 /// ~~~{.cpp}
1135 /// auto nominal_hx =
1136 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
1137 /// .Histo1D("x", "y");
1138 ///
1139 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1140 /// hx["nominal"].Draw();
1141 /// hx["xy:0"].Draw("SAME");
1142 /// hx["xy:1"].Draw("SAME");
1143 /// ~~~
1144 ///
1145 /// \sa This Vary() overload for more information.
1146 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1147 std::size_t nVariations, std::string_view variationName)
1148 {
1149 std::vector<std::string> variationTags;
1150 variationTags.reserve(nVariations);
1151 for (std::size_t i = 0u; i < nVariations; ++i)
1152 variationTags.emplace_back(std::to_string(i));
1153
1154 return Vary(colNames, expression, std::move(variationTags), variationName);
1155 }
1156
1157 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1158 /// \param[in] colNames set of names of the columns for which varied values are provided.
1159 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1160 /// values for the specified column.
1161 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1162 /// `"1"`, etc.
1163 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1164 /// colName is used if none is provided.
1165 ///
1166 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1167 /// is avoided.
1168 ///
1169 /// \sa This Vary() overload for more information.
1170 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1171 std::size_t nVariations, std::string_view variationName)
1172 {
1173 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1174 }
1175
1176 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1177 /// \param[in] colNames set of names of the columns for which varied values are provided.
1178 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1179 /// values for the specified columns.
1180 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1181 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1182 ///
1183 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1184 /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1185 /// `{"down", "up"}`.
1186 /// ~~~{.cpp}
1187 /// auto nominal_hx =
1188 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1189 /// .Histo1D("x", "y");
1190 ///
1191 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1192 /// hx["nominal"].Draw();
1193 /// hx["xy:down"].Draw("SAME");
1194 /// hx["xy:up"].Draw("SAME");
1195 /// ~~~
1196 ///
1197 /// \sa This Vary() overload for more information.
1198 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1199 const std::vector<std::string> &variationTags, std::string_view variationName)
1200 {
1201 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1202 }
1203
1204 ////////////////////////////////////////////////////////////////////////////
1205 /// \brief Allow to refer to a column with a different name.
1206 /// \param[in] alias name of the column alias
1207 /// \param[in] columnName of the column to be aliased
1208 /// \return the first node of the computation graph for which the alias is available.
1209 ///
1210 /// Aliasing an alias is supported.
1211 ///
1212 /// ### Example usage:
1213 /// ~~~{.cpp}
1214 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1215 /// ~~~
1216 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1217 {
1218 // The symmetry with Define is clear. We want to:
1219 // - Create globally the alias and return this very node, unchanged
1220 // - Make aliases accessible based on chains and not globally
1221
1222 // Helper to find out if a name is a column
1223 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1224
1225 constexpr auto where = "Alias";
1227 // If the alias name is a column name, there is a problem
1228 RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames);
1229
1230 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1231
1233 newCols.AddAlias(alias, validColumnName);
1234
1235 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
1236
1237 return newInterface;
1238 }
1239
1240 ////////////////////////////////////////////////////////////////////////////
1241 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1242 /// \tparam ColumnTypes variadic list of branch/column types.
1243 /// \param[in] treename The name of the output TTree.
1244 /// \param[in] filename The name of the output TFile.
1245 /// \param[in] columnList The list of names of the columns/branches to be written.
1246 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1247 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1248 ///
1249 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1250 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1251 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1252 /// written out and it appears before the array in the columnList.
1253 ///
1254 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1255 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1256 /// present, by default all friend top-level branches that have names that do not collide with
1257 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1258 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1259 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1260 ///
1261 /// ### Writing to a sub-directory
1262 ///
1263 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1264 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1265 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1266 ///
1267 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1268 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
1269 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
1270 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1271 /// error out if such a "shuffled" TTree is used in a friendship.
1272 ///
1273 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
1274 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
1275 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
1276 ///
1277 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1278 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1279 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1280 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1281 ///
1282 /// ### Example invocations:
1283 ///
1284 /// ~~~{.cpp}
1285 /// // without specifying template parameters (column types automatically deduced)
1286 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1287 ///
1288 /// // specifying template parameters ("x" is `int`, "y" is `float`)
1289 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1290 /// ~~~
1291 ///
1292 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1293 /// `RSnapshotOptions`:
1294 /// ~~~{.cpp}
1295 /// RSnapshotOptions opts;
1296 /// opts.fLazy = true;
1297 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1298 /// ~~~
1299 template <typename... ColumnTypes>
1301 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
1302 const RSnapshotOptions &options = RSnapshotOptions())
1303 {
1304 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1305 }
1306
1307 ////////////////////////////////////////////////////////////////////////////
1308 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1309 /// \param[in] treename The name of the output TTree.
1310 /// \param[in] filename The name of the output TFile.
1311 /// \param[in] columnList The list of names of the columns/branches to be written.
1312 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1313 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1314 ///
1315 /// This function returns a `RDataFrame` built with the output tree as a source.
1316 /// The types of the columns are automatically inferred and do not need to be specified.
1317 ///
1318 /// See above for a more complete description and example usages.
1319 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1320 const ColumnNames_t &columnList,
1321 const RSnapshotOptions &options = RSnapshotOptions())
1322 {
1323 // like columnList but with `#var` columns removed
1324 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1325 // like columnListWithoutSizeColumns but with aliases resolved
1326 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1328 // like validCols but with missing size branches required by array branches added in the right positions
1329 const auto pairOfColumnLists =
1330 RDFInternal::AddSizeBranches(fLoopManager->GetBranchNames(), fLoopManager->GetTree(),
1331 std::move(colListNoAliases), std::move(colListNoPoundSizes));
1332 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1333 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1334
1335
1336 const auto fullTreeName = treename;
1337 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1338 treename = parsedTreePath.fTreeName;
1339 const auto &dirname = parsedTreePath.fDirName;
1340
1341 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
1342 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
1343 colListWithAliasesAndSizeBranches, options});
1344
1346
1347 // The CreateLMFromTTree function by default opens the file passed as input
1348 // to check for the presence of the TTree inside. But at this moment the
1349 // filename we are using here corresponds to a file which does not exist yet,
1350 // i.e. the output file of the Snapshot call. Thus, checkFile=false will
1351 // prevent the function from trying to open a non-existent file.
1352 auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree(
1353 fullTreeName, filename, colListNoAliasesWithSizeBranches, /*checkFile*/ false));
1354
1355 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1356 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, colListNoAliasesWithSizeBranches.size(),
1357 options.fVector2RVec);
1358
1359 if (!options.fLazy)
1360 *resPtr;
1361 return resPtr;
1362 }
1363
1364 // clang-format off
1365 ////////////////////////////////////////////////////////////////////////////
1366 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1367 /// \param[in] treename The name of the output TTree.
1368 /// \param[in] filename The name of the output TFile.
1369 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1370 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1371 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1372 ///
1373 /// This function returns a `RDataFrame` built with the output tree as a source.
1374 /// The types of the columns are automatically inferred and do not need to be specified.
1375 ///
1376 /// See above for a more complete description and example usages.
1377 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1378 std::string_view columnNameRegexp = "",
1379 const RSnapshotOptions &options = RSnapshotOptions())
1380 {
1381 const auto definedColumns = fColRegister.GenerateColumnNames();
1382 auto *tree = fLoopManager->GetTree();
1383 const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1384 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1385 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1386 ColumnNames_t dsColumnsWithoutSizeColumns;
1387 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1388 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1389 ColumnNames_t columnNames;
1390 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1391 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1392 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1393 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1394
1395 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1396 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1397 RDFInternal::RemoveDuplicates(columnNames);
1398
1399 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1400 return Snapshot(treename, filename, selectedColumns, options);
1401 }
1402 // clang-format on
1403
1404 // clang-format off
1405 ////////////////////////////////////////////////////////////////////////////
1406 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1407 /// \param[in] treename The name of the output TTree.
1408 /// \param[in] filename The name of the output TFile.
1409 /// \param[in] columnList The list of names of the columns/branches to be written.
1410 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1411 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1412 ///
1413 /// This function returns a `RDataFrame` built with the output tree as a source.
1414 /// The types of the columns are automatically inferred and do not need to be specified.
1415 ///
1416 /// See above for a more complete description and example usages.
1417 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1418 std::initializer_list<std::string> columnList,
1419 const RSnapshotOptions &options = RSnapshotOptions())
1420 {
1421 ColumnNames_t selectedColumns(columnList);
1422 return Snapshot(treename, filename, selectedColumns, options);
1423 }
1424 // clang-format on
1425
1426 ////////////////////////////////////////////////////////////////////////////
1427 /// \brief Save selected columns in memory.
1428 /// \tparam ColumnTypes variadic list of branch/column types.
1429 /// \param[in] columnList columns to be cached in memory.
1430 /// \return a `RDataFrame` that wraps the cached dataset.
1431 ///
1432 /// This action returns a new `RDataFrame` object, completely detached from
1433 /// the originating `RDataFrame`. The new dataframe only contains the cached
1434 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1435 ///
1436 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1437 /// fits in memory and that will be accessed many times.
1438 ///
1439 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1440 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1441 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1442 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1443 ///
1444 /// ### Example usage:
1445 ///
1446 /// **Types and columns specified:**
1447 /// ~~~{.cpp}
1448 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1449 /// ~~~
1450 ///
1451 /// **Types inferred and columns specified (this invocation relies on jitting):**
1452 /// ~~~{.cpp}
1453 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1454 /// ~~~
1455 ///
1456 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1457 /// ~~~{.cpp}
1458 /// auto cache_all_cols_df = df.Cache(myRegexp);
1459 /// ~~~
1460 template <typename... ColumnTypes>
1462 {
1463 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1464 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1465 }
1466
1467 ////////////////////////////////////////////////////////////////////////////
1468 /// \brief Save selected columns in memory.
1469 /// \param[in] columnList columns to be cached in memory
1470 /// \return a `RDataFrame` that wraps the cached dataset.
1471 ///
1472 /// See the previous overloads for more information.
1474 {
1475 // Early return: if the list of columns is empty, just return an empty RDF
1476 // If we proceed, the jitted call will not compile!
1477 if (columnList.empty()) {
1478 auto nEntries = *this->Count();
1479 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1480 return emptyRDF;
1481 }
1482
1483 std::stringstream cacheCall;
1484 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1485 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1486 fColRegister);
1487 // build a string equivalent to
1488 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1489 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1490 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1492 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1493 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1494
1495 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1496
1497 const auto validColumnNames =
1498 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1499 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1500 "Cache", /*vector2RVec=*/false);
1501 for (const auto &colType : colTypes)
1502 cacheCall << colType << ", ";
1503 if (!columnListWithoutSizeColumns.empty())
1504 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1505 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1506 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1507
1508 // book the code to jit with the RLoopManager and trigger the event loop
1509 fLoopManager->ToJitExec(cacheCall.str());
1510 fLoopManager->Jit();
1511
1512 return resRDF;
1513 }
1514
1515 ////////////////////////////////////////////////////////////////////////////
1516 /// \brief Save selected columns in memory.
1517 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1518 /// \return a `RDataFrame` that wraps the cached dataset.
1519 ///
1520 /// The existing columns are matched against the regular expression. If the string provided
1521 /// is empty, all columns are selected. See the previous overloads for more information.
1522 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1523 {
1524 const auto definedColumns = fColRegister.GenerateColumnNames();
1525 auto *tree = fLoopManager->GetTree();
1526 const auto treeBranchNames =
1528 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1529 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1530 ColumnNames_t dsColumnsWithoutSizeColumns;
1531 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1532 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1533 ColumnNames_t columnNames;
1534 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1535 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1536 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1537 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1538 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1539 return Cache(selectedColumns);
1540 }
1541
1542 ////////////////////////////////////////////////////////////////////////////
1543 /// \brief Save selected columns in memory.
1544 /// \param[in] columnList columns to be cached in memory.
1545 /// \return a `RDataFrame` that wraps the cached dataset.
1546 ///
1547 /// See the previous overloads for more information.
1548 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1549 {
1550 ColumnNames_t selectedColumns(columnList);
1551 return Cache(selectedColumns);
1552 }
1553
1554 // clang-format off
1555 ////////////////////////////////////////////////////////////////////////////
1556 /// \brief Creates a node that filters entries based on range: [begin, end).
1557 /// \param[in] begin Initial entry number considered for this range.
1558 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1559 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1560 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1561 ///
1562 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1563 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1564 ///
1565 /// ### Example usage:
1566 /// ~~~{.cpp}
1567 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1568 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1569 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1570 /// ~~~
1571 // clang-format on
1572 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1573 {
1574 // check invariants
1575 if (stride == 0 || (end != 0 && end < begin))
1576 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1577 CheckIMTDisabled("Range");
1578
1579 using Range_t = RDFDetail::RRange<Proxied>;
1580 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1581 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1582 return newInterface;
1583 }
1584
1585 // clang-format off
1586 ////////////////////////////////////////////////////////////////////////////
1587 /// \brief Creates a node that filters entries based on range.
1588 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1589 /// \return a node of the computation graph for which the range is defined.
1590 ///
1591 /// See the other Range overload for a detailed description.
1592 // clang-format on
1593 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1594
1595 // clang-format off
1596 ////////////////////////////////////////////////////////////////////////////
1597 /// \brief Execute a user-defined function on each entry (*instant action*).
1598 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1599 /// \param[in] columns Names of the columns/branches in input to the user function.
1600 ///
1601 /// The callable `f` is invoked once per entry. This is an *instant action*:
1602 /// upon invocation, an event loop as well as execution of all scheduled actions
1603 /// is triggered.
1604 /// Users are responsible for the thread-safety of this callable when executing
1605 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1606 ///
1607 /// ### Example usage:
1608 /// ~~~{.cpp}
1609 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1610 /// ~~~
1611 // clang-format on
1612 template <typename F>
1613 void Foreach(F f, const ColumnNames_t &columns = {})
1614 {
1615 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1616 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1617 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1618 }
1619
1620 // clang-format off
1621 ////////////////////////////////////////////////////////////////////////////
1622 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1623 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1624 /// \param[in] columns Names of the columns/branches in input to the user function.
1625 ///
1626 /// Same as `Foreach`, but the user-defined function takes an extra
1627 /// `unsigned int` as its first parameter, the *processing slot index*.
1628 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1629 /// for each thread of execution.
1630 /// This is meant as a helper in writing thread-safe `Foreach`
1631 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1632 /// The user-defined processing callable is able to follow different
1633 /// *streams of processing* indexed by the first parameter.
1634 /// `ForeachSlot` works just as well with single-thread execution: in that
1635 /// case `slot` will always be `0`.
1636 ///
1637 /// ### Example usage:
1638 /// ~~~{.cpp}
1639 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1640 /// ~~~
1641 // clang-format on
1642 template <typename F>
1643 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1644 {
1646 constexpr auto nColumns = ColTypes_t::list_size;
1647
1648 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1649 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1650
1651 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1653
1654 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1655
1656 fLoopManager->Run();
1657 }
1658
1659 // clang-format off
1660 ////////////////////////////////////////////////////////////////////////////
1661 /// \brief Execute a user-defined reduce operation on the values of a column.
1662 /// \tparam F The type of the reduce callable. Automatically deduced.
1663 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1664 /// \param[in] f A callable with signature `T(T,T)`
1665 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1666 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1667 ///
1668 /// A reduction takes two values of a column and merges them into one (e.g.
1669 /// by summing them, taking the maximum, etc). This action performs the
1670 /// specified reduction operation on all processed column values, returning
1671 /// a single value of the same type. The callable f must satisfy the general
1672 /// requirements of a *processing function* besides having signature `T(T,T)`
1673 /// where `T` is the type of column columnName.
1674 ///
1675 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1676 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1677 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1678 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1679 /// overload.
1680 ///
1681 /// ### Example usage:
1682 /// ~~~{.cpp}
1683 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1684 /// ~~~
1685 ///
1686 /// This action is *lazy*: upon invocation of this method the calculation is
1687 /// booked but not executed. Also see RResultPtr.
1688 // clang-format on
1689 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1690 RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1691 {
1692 static_assert(
1693 std::is_default_constructible<T>::value,
1694 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1695 return Reduce(std::move(f), columnName, T());
1696 }
1697
1698 ////////////////////////////////////////////////////////////////////////////
1699 /// \brief Execute a user-defined reduce operation on the values of a column.
1700 /// \tparam F The type of the reduce callable. Automatically deduced.
1701 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1702 /// \param[in] f A callable with signature `T(T,T)`
1703 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1704 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1705 /// \return the reduced quantity wrapped in a RResultPtr.
1706 ///
1707 /// ### Example usage:
1708 /// ~~~{.cpp}
1709 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1710 /// ~~~
1711 /// See the description of the first Reduce overload for more information.
1712 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1713 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1714 {
1715 return Aggregate(f, f, columnName, redIdentity);
1716 }
1717
1718 ////////////////////////////////////////////////////////////////////////////
1719 /// \brief Return the number of entries processed (*lazy action*).
1720 /// \return the number of entries wrapped in a RResultPtr.
1721 ///
1722 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1723 /// This action is *lazy*: upon invocation of this method the calculation is
1724 /// booked but not executed. Also see RResultPtr.
1725 ///
1726 /// ### Example usage:
1727 /// ~~~{.cpp}
1728 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1729 /// ~~~
1730 ///
1732 {
1733 const auto nSlots = fLoopManager->GetNSlots();
1734 auto cSPtr = std::make_shared<ULong64_t>(0);
1735 using Helper_t = RDFInternal::CountHelper;
1737 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1739 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1740 }
1741
1742 ////////////////////////////////////////////////////////////////////////////
1743 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1744 /// \tparam T The type of the column.
1745 /// \tparam COLL The type of collection used to store the values.
1746 /// \param[in] column The name of the column to collect the values of.
1747 /// \return the content of the selected column wrapped in a RResultPtr.
1748 ///
1749 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1750 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1751 /// ### Example usage:
1752 /// ~~~{.cpp}
1753 /// // In this case intCol is a std::vector<int>
1754 /// auto intCol = rdf.Take<int>("integerColumn");
1755 /// // Same content as above but in this case taken as a RVec<int>
1756 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1757 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1758 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1759 /// ~~~
1760 /// This action is *lazy*: upon invocation of this method the calculation is
1761 /// booked but not executed. Also see RResultPtr.
1762 template <typename T, typename COLL = std::vector<T>>
1763 RResultPtr<COLL> Take(std::string_view column = "")
1764 {
1765 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1766
1767 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1768 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1769
1770 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1772 auto valuesPtr = std::make_shared<COLL>();
1773 const auto nSlots = fLoopManager->GetNSlots();
1774
1775 auto action =
1776 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1777 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1778 }
1779
1780 ////////////////////////////////////////////////////////////////////////////
1781 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1782 /// \tparam V The type of the column used to fill the histogram.
1783 /// \param[in] model The returned histogram will be constructed using this as a model.
1784 /// \param[in] vName The name of the column that will fill the histogram.
1785 /// \return the monodimensional histogram wrapped in a RResultPtr.
1786 ///
1787 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1788 /// is filled with each one of the elements of the container. In case multiple columns of container type
1789 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1790 /// possibly different lengths between events).
1791 /// This action is *lazy*: upon invocation of this method the calculation is
1792 /// booked but not executed. Also see RResultPtr.
1793 ///
1794 /// ### Example usage:
1795 /// ~~~{.cpp}
1796 /// // Deduce column type (this invocation needs jitting internally)
1797 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1798 /// // Explicit column type
1799 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1800 /// ~~~
1801 ///
1802 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1803 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1804 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1805 template <typename V = RDFDetail::RInferredType>
1806 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1807 {
1808 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1809
1810 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1811
1812 std::shared_ptr<::TH1D> h(nullptr);
1813 {
1814 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1815 h = model.GetHistogram();
1816 h->SetDirectory(nullptr);
1817 }
1818
1819 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1820 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1821 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1822 }
1823
1824 ////////////////////////////////////////////////////////////////////////////
1825 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1826 /// \tparam V The type of the column used to fill the histogram.
1827 /// \param[in] vName The name of the column that will fill the histogram.
1828 /// \return the monodimensional histogram wrapped in a RResultPtr.
1829 ///
1830 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1831 /// The "name" and "title" strings are built starting from the input column name.
1832 /// See the description of the first Histo1D() overload for more details.
1833 ///
1834 /// ### Example usage:
1835 /// ~~~{.cpp}
1836 /// // Deduce column type (this invocation needs jitting internally)
1837 /// auto myHist1 = myDf.Histo1D("myColumn");
1838 /// // Explicit column type
1839 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1840 /// ~~~
1841 template <typename V = RDFDetail::RInferredType>
1842 RResultPtr<::TH1D> Histo1D(std::string_view vName)
1843 {
1844 const auto h_name = std::string(vName);
1845 const auto h_title = h_name + ";" + h_name + ";count";
1846 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1847 }
1848
1849 ////////////////////////////////////////////////////////////////////////////
1850 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1851 /// \tparam V The type of the column used to fill the histogram.
1852 /// \tparam W The type of the column used as weights.
1853 /// \param[in] model The returned histogram will be constructed using this as a model.
1854 /// \param[in] vName The name of the column that will fill the histogram.
1855 /// \param[in] wName The name of the column that will provide the weights.
1856 /// \return the monodimensional histogram wrapped in a RResultPtr.
1857 ///
1858 /// See the description of the first Histo1D() overload for more details.
1859 ///
1860 /// ### Example usage:
1861 /// ~~~{.cpp}
1862 /// // Deduce column type (this invocation needs jitting internally)
1863 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1864 /// // Explicit column type
1865 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1866 /// ~~~
1867 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1868 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1869 {
1870 const std::vector<std::string_view> columnViews = {vName, wName};
1871 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1872 ? ColumnNames_t()
1873 : ColumnNames_t(columnViews.begin(), columnViews.end());
1874 std::shared_ptr<::TH1D> h(nullptr);
1875 {
1876 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1877 h = model.GetHistogram();
1878 }
1879 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1880 }
1881
1882 ////////////////////////////////////////////////////////////////////////////
1883 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1884 /// \tparam V The type of the column used to fill the histogram.
1885 /// \tparam W The type of the column used as weights.
1886 /// \param[in] vName The name of the column that will fill the histogram.
1887 /// \param[in] wName The name of the column that will provide the weights.
1888 /// \return the monodimensional histogram wrapped in a RResultPtr.
1889 ///
1890 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1891 /// The "name" and "title" strings are built starting from the input column names.
1892 /// See the description of the first Histo1D() overload for more details.
1893 ///
1894 /// ### Example usage:
1895 /// ~~~{.cpp}
1896 /// // Deduce column types (this invocation needs jitting internally)
1897 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1898 /// // Explicit column types
1899 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1900 /// ~~~
1901 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1902 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1903 {
1904 // We build name and title based on the value and weight column names
1905 std::string str_vName{vName};
1906 std::string str_wName{wName};
1907 const auto h_name = str_vName + "_weighted_" + str_wName;
1908 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1909 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1910 }
1911
1912 ////////////////////////////////////////////////////////////////////////////
1913 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1914 /// \tparam V The type of the column used to fill the histogram.
1915 /// \tparam W The type of the column used as weights.
1916 /// \param[in] model The returned histogram will be constructed using this as a model.
1917 /// \return the monodimensional histogram wrapped in a RResultPtr.
1918 ///
1919 /// This overload will use the first two default columns as column names.
1920 /// See the description of the first Histo1D() overload for more details.
1921 template <typename V, typename W>
1922 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1923 {
1924 return Histo1D<V, W>(model, "", "");
1925 }
1926
1927 ////////////////////////////////////////////////////////////////////////////
1928 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1929 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1930 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1931 /// \param[in] model The returned histogram will be constructed using this as a model.
1932 /// \param[in] v1Name The name of the column that will fill the x axis.
1933 /// \param[in] v2Name The name of the column that will fill the y axis.
1934 /// \return the bidimensional histogram wrapped in a RResultPtr.
1935 ///
1936 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1937 /// is filled with each one of the elements of the container. In case multiple columns of container type
1938 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1939 /// possibly different lengths between events).
1940 /// This action is *lazy*: upon invocation of this method the calculation is
1941 /// booked but not executed. Also see RResultPtr.
1942 ///
1943 /// ### Example usage:
1944 /// ~~~{.cpp}
1945 /// // Deduce column types (this invocation needs jitting internally)
1946 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1947 /// // Explicit column types
1948 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1949 /// ~~~
1950 ///
1951 ///
1952 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1953 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1954 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1955 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1956 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1957 {
1958 std::shared_ptr<::TH2D> h(nullptr);
1959 {
1960 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1961 h = model.GetHistogram();
1962 }
1963 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1964 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1965 }
1966 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1967 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1968 ? ColumnNames_t()
1969 : ColumnNames_t(columnViews.begin(), columnViews.end());
1970 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
1971 }
1972
1973 ////////////////////////////////////////////////////////////////////////////
1974 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1975 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1976 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1977 /// \tparam W The type of the column used for the weights of the histogram.
1978 /// \param[in] model The returned histogram will be constructed using this as a model.
1979 /// \param[in] v1Name The name of the column that will fill the x axis.
1980 /// \param[in] v2Name The name of the column that will fill the y axis.
1981 /// \param[in] wName The name of the column that will provide the weights.
1982 /// \return the bidimensional histogram wrapped in a RResultPtr.
1983 ///
1984 /// This action is *lazy*: upon invocation of this method the calculation is
1985 /// booked but not executed. Also see RResultPtr.
1986 ///
1987 /// ### Example usage:
1988 /// ~~~{.cpp}
1989 /// // Deduce column types (this invocation needs jitting internally)
1990 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1991 /// // Explicit column types
1992 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1993 /// ~~~
1994 ///
1995 /// See the documentation of the first Histo2D() overload for more details.
1996 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1997 typename W = RDFDetail::RInferredType>
1999 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2000 {
2001 std::shared_ptr<::TH2D> h(nullptr);
2002 {
2003 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2004 h = model.GetHistogram();
2005 }
2006 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2007 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2008 }
2009 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2010 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2011 ? ColumnNames_t()
2012 : ColumnNames_t(columnViews.begin(), columnViews.end());
2013 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2014 }
2015
2016 template <typename V1, typename V2, typename W>
2018 {
2019 return Histo2D<V1, V2, W>(model, "", "", "");
2020 }
2021
2022 ////////////////////////////////////////////////////////////////////////////
2023 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2024 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2025 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2026 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2027 /// \param[in] model The returned histogram will be constructed using this as a model.
2028 /// \param[in] v1Name The name of the column that will fill the x axis.
2029 /// \param[in] v2Name The name of the column that will fill the y axis.
2030 /// \param[in] v3Name The name of the column that will fill the z axis.
2031 /// \return the tridimensional histogram wrapped in a RResultPtr.
2032 ///
2033 /// This action is *lazy*: upon invocation of this method the calculation is
2034 /// booked but not executed. Also see RResultPtr.
2035 ///
2036 /// ### Example usage:
2037 /// ~~~{.cpp}
2038 /// // Deduce column types (this invocation needs jitting internally)
2039 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2040 /// "myValueX", "myValueY", "myValueZ");
2041 /// // Explicit column types
2042 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2043 /// "myValueX", "myValueY", "myValueZ");
2044 /// ~~~
2045 ///
2046 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2047 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2048 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2049 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2050 typename V3 = RDFDetail::RInferredType>
2051 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
2052 std::string_view v3Name = "")
2053 {
2054 std::shared_ptr<::TH3D> h(nullptr);
2055 {
2056 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2057 h = model.GetHistogram();
2058 }
2059 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2060 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2061 }
2062 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2063 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2064 ? ColumnNames_t()
2065 : ColumnNames_t(columnViews.begin(), columnViews.end());
2066 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2067 }
2068
2069 ////////////////////////////////////////////////////////////////////////////
2070 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2071 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2072 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2073 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2074 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2075 /// \param[in] model The returned histogram will be constructed using this as a model.
2076 /// \param[in] v1Name The name of the column that will fill the x axis.
2077 /// \param[in] v2Name The name of the column that will fill the y axis.
2078 /// \param[in] v3Name The name of the column that will fill the z axis.
2079 /// \param[in] wName The name of the column that will provide the weights.
2080 /// \return the tridimensional histogram wrapped in a RResultPtr.
2081 ///
2082 /// This action is *lazy*: upon invocation of this method the calculation is
2083 /// booked but not executed. Also see RResultPtr.
2084 ///
2085 /// ### Example usage:
2086 /// ~~~{.cpp}
2087 /// // Deduce column types (this invocation needs jitting internally)
2088 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2089 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2090 /// // Explicit column types
2091 /// using d_t = double;
2092 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2093 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2094 /// ~~~
2095 ///
2096 ///
2097 /// See the documentation of the first Histo2D() overload for more details.
2098 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2099 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2100 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
2101 std::string_view v3Name, std::string_view wName)
2102 {
2103 std::shared_ptr<::TH3D> h(nullptr);
2104 {
2105 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2106 h = model.GetHistogram();
2107 }
2108 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2109 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2110 }
2111 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2112 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2113 ? ColumnNames_t()
2114 : ColumnNames_t(columnViews.begin(), columnViews.end());
2115 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2116 }
2117
2118 template <typename V1, typename V2, typename V3, typename W>
2120 {
2121 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
2122 }
2123
2124 ////////////////////////////////////////////////////////////////////////////
2125 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2126 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2127 /// present.
2128 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2129 /// object.
2130 /// \param[in] model The returned histogram will be constructed using this as a model.
2131 /// \param[in] columnList
2132 /// A list containing the names of the columns that will be passed when calling `Fill`.
2133 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2134 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2135 ///
2136 /// This action is *lazy*: upon invocation of this method the calculation is
2137 /// booked but not executed. See RResultPtr documentation.
2138 ///
2139 /// ### Example usage:
2140 /// ~~~{.cpp}
2141 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
2142 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2143 /// {"col0", "col1", "col2", "col3"});
2144 /// ~~~
2145 ///
2146 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2147 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2148 {
2149 std::shared_ptr<::THnD> h(nullptr);
2150 {
2151 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2152 h = model.GetHistogram();
2153
2154 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2155 h->Sumw2();
2156 } else if (int(columnList.size()) != h->GetNdimensions()) {
2157 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2158 }
2159 }
2160 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
2161 fProxiedPtr);
2162 }
2163
2164 ////////////////////////////////////////////////////////////////////////////
2165 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2166 /// \param[in] model The returned histogram will be constructed using this as a model.
2167 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2168 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2169 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2170 ///
2171 /// This action is *lazy*: upon invocation of this method the calculation is
2172 /// booked but not executed. Also see RResultPtr.
2173 ///
2174 /// ### Example usage:
2175 /// ~~~{.cpp}
2176 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2177 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2178 /// {"col0", "col1", "col2", "col3"});
2179 /// ~~~
2180 ///
2181 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2182 {
2183 std::shared_ptr<::THnD> h(nullptr);
2184 {
2185 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2186 h = model.GetHistogram();
2187
2188 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2189 h->Sumw2();
2190 } else if (int(columnList.size()) != h->GetNdimensions()) {
2191 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2192 }
2193 }
2194 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
2195 columnList.size());
2196 }
2197
2198 ////////////////////////////////////////////////////////////////////////////
2199 /// \brief Fill and return a TGraph object (*lazy action*).
2200 /// \tparam X The type of the column used to fill the x axis.
2201 /// \tparam Y The type of the column used to fill the y axis.
2202 /// \param[in] x The name of the column that will fill the x axis.
2203 /// \param[in] y The name of the column that will fill the y axis.
2204 /// \return the TGraph wrapped in a RResultPtr.
2205 ///
2206 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2207 /// is filled with each one of the elements of the container.
2208 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2209 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2210 /// A name and a title to the TGraph is given based on the input column names.
2211 ///
2212 /// This action is *lazy*: upon invocation of this method the calculation is
2213 /// booked but not executed. Also see RResultPtr.
2214 ///
2215 /// ### Example usage:
2216 /// ~~~{.cpp}
2217 /// // Deduce column types (this invocation needs jitting internally)
2218 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2219 /// // Explicit column types
2220 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2221 /// ~~~
2222 ///
2223 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2224 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2225 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2226 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2227 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2228 {
2229 auto graph = std::make_shared<::TGraph>();
2230 const std::vector<std::string_view> columnViews = {x, y};
2231 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2232 ? ColumnNames_t()
2233 : ColumnNames_t(columnViews.begin(), columnViews.end());
2234
2235 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2236
2237 // We build a default name and title based on the input columns
2238 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2239 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2240 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2241 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2242 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2243
2244 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
2245 }
2246
2247 ////////////////////////////////////////////////////////////////////////////
2248 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2249 /// \param[in] x The name of the column that will fill the x axis.
2250 /// \param[in] y The name of the column that will fill the y axis.
2251 /// \param[in] exl The name of the column of X low errors
2252 /// \param[in] exh The name of the column of X high errors
2253 /// \param[in] eyl The name of the column of Y low errors
2254 /// \param[in] eyh The name of the column of Y high errors
2255 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2256 ///
2257 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2258 /// is filled with each one of the elements of the container.
2259 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2260 ///
2261 /// This action is *lazy*: upon invocation of this method the calculation is
2262 /// booked but not executed. Also see RResultPtr.
2263 ///
2264 /// ### Example usage:
2265 /// ~~~{.cpp}
2266 /// // Deduce column types (this invocation needs jitting internally)
2267 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2268 /// // Explicit column types
2269 /// using f = float
2270 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2271 /// ~~~
2272 ///
2273 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2274 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2275 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2276 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2277 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2278 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2280 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2281 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2282 {
2283 auto graph = std::make_shared<::TGraphAsymmErrors>();
2284 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2285 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2286 ? ColumnNames_t()
2287 : ColumnNames_t(columnViews.begin(), columnViews.end());
2288
2289 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2290
2291 // We build a default name and title based on the input columns
2292 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2293 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2294 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2295 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2296 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2297
2298 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2300 }
2301
2302 ////////////////////////////////////////////////////////////////////////////
2303 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2304 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2305 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2306 /// \param[in] model The model to be considered to build the new return value.
2307 /// \param[in] v1Name The name of the column that will fill the x axis.
2308 /// \param[in] v2Name The name of the column that will fill the y axis.
2309 /// \return the monodimensional profile wrapped in a RResultPtr.
2310 ///
2311 /// This action is *lazy*: upon invocation of this method the calculation is
2312 /// booked but not executed. Also see RResultPtr.
2313 ///
2314 /// ### Example usage:
2315 /// ~~~{.cpp}
2316 /// // Deduce column types (this invocation needs jitting internally)
2317 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2318 /// // Explicit column types
2319 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2320 /// ~~~
2321 ///
2322 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2323 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2324 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2325 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2327 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2328 {
2329 std::shared_ptr<::TProfile> h(nullptr);
2330 {
2331 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2332 h = model.GetProfile();
2333 }
2334
2335 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2336 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2337 }
2338 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2339 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2340 ? ColumnNames_t()
2341 : ColumnNames_t(columnViews.begin(), columnViews.end());
2342 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2343 }
2344
2345 ////////////////////////////////////////////////////////////////////////////
2346 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2347 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2348 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2349 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2350 /// \param[in] model The model to be considered to build the new return value.
2351 /// \param[in] v1Name The name of the column that will fill the x axis.
2352 /// \param[in] v2Name The name of the column that will fill the y axis.
2353 /// \param[in] wName The name of the column that will provide the weights.
2354 /// \return the monodimensional profile wrapped in a RResultPtr.
2355 ///
2356 /// This action is *lazy*: upon invocation of this method the calculation is
2357 /// booked but not executed. Also see RResultPtr.
2358 ///
2359 /// ### Example usage:
2360 /// ~~~{.cpp}
2361 /// // Deduce column types (this invocation needs jitting internally)
2362 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2363 /// // Explicit column types
2364 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2365 /// "xValues", "yValues", "weight");
2366 /// ~~~
2367 ///
2368 /// See the first Profile1D() overload for more details.
2369 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2370 typename W = RDFDetail::RInferredType>
2372 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2373 {
2374 std::shared_ptr<::TProfile> h(nullptr);
2375 {
2376 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2377 h = model.GetProfile();
2378 }
2379
2380 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2381 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2382 }
2383 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2384 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2385 ? ColumnNames_t()
2386 : ColumnNames_t(columnViews.begin(), columnViews.end());
2387 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2388 }
2389
2390 ////////////////////////////////////////////////////////////////////////////
2391 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2392 /// See the first Profile1D() overload for more details.
2393 template <typename V1, typename V2, typename W>
2395 {
2396 return Profile1D<V1, V2, W>(model, "", "", "");
2397 }
2398
2399 ////////////////////////////////////////////////////////////////////////////
2400 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2401 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2402 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2403 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2404 /// \param[in] model The returned profile will be constructed using this as a model.
2405 /// \param[in] v1Name The name of the column that will fill the x axis.
2406 /// \param[in] v2Name The name of the column that will fill the y axis.
2407 /// \param[in] v3Name The name of the column that will fill the z axis.
2408 /// \return the bidimensional profile wrapped in a RResultPtr.
2409 ///
2410 /// This action is *lazy*: upon invocation of this method the calculation is
2411 /// booked but not executed. Also see RResultPtr.
2412 ///
2413 /// ### Example usage:
2414 /// ~~~{.cpp}
2415 /// // Deduce column types (this invocation needs jitting internally)
2416 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2417 /// "xValues", "yValues", "zValues");
2418 /// // Explicit column types
2419 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2420 /// "xValues", "yValues", "zValues");
2421 /// ~~~
2422 ///
2423 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2424 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2425 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2426 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2427 typename V3 = RDFDetail::RInferredType>
2428 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2429 std::string_view v2Name = "", std::string_view v3Name = "")
2430 {
2431 std::shared_ptr<::TProfile2D> h(nullptr);
2432 {
2433 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2434 h = model.GetProfile();
2435 }
2436
2437 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2438 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2439 }
2440 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2441 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2442 ? ColumnNames_t()
2443 : ColumnNames_t(columnViews.begin(), columnViews.end());
2444 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2445 }
2446
2447 ////////////////////////////////////////////////////////////////////////////
2448 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2449 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2450 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2451 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2452 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2453 /// \param[in] model The returned histogram will be constructed using this as a model.
2454 /// \param[in] v1Name The name of the column that will fill the x axis.
2455 /// \param[in] v2Name The name of the column that will fill the y axis.
2456 /// \param[in] v3Name The name of the column that will fill the z axis.
2457 /// \param[in] wName The name of the column that will provide the weights.
2458 /// \return the bidimensional profile wrapped in a RResultPtr.
2459 ///
2460 /// This action is *lazy*: upon invocation of this method the calculation is
2461 /// booked but not executed. Also see RResultPtr.
2462 ///
2463 /// ### Example usage:
2464 /// ~~~{.cpp}
2465 /// // Deduce column types (this invocation needs jitting internally)
2466 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2467 /// "xValues", "yValues", "zValues", "weight");
2468 /// // Explicit column types
2469 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2470 /// "xValues", "yValues", "zValues", "weight");
2471 /// ~~~
2472 ///
2473 /// See the first Profile2D() overload for more details.
2474 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2475 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2476 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2477 std::string_view v3Name, std::string_view wName)
2478 {
2479 std::shared_ptr<::TProfile2D> h(nullptr);
2480 {
2481 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2482 h = model.GetProfile();
2483 }
2484
2485 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2486 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2487 }
2488 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2489 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2490 ? ColumnNames_t()
2491 : ColumnNames_t(columnViews.begin(), columnViews.end());
2492 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2493 }
2494
2495 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2496 /// See the first Profile2D() overload for more details.
2497 template <typename V1, typename V2, typename V3, typename W>
2499 {
2500 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2501 }
2502
2503 ////////////////////////////////////////////////////////////////////////////
2504 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2505 ///
2506 /// Type T must provide at least:
2507 /// - a copy-constructor
2508 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2509 /// (these types can also be passed as template parameters to this method)
2510 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2511 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2512 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2513 /// the TCollection*).
2514 ///
2515 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2516 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2517 /// \tparam T The type of the object to fill. Automatically deduced.
2518 /// \param[in] model The model to be considered to build the new return value.
2519 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2520 /// \return the filled object wrapped in a RResultPtr.
2521 ///
2522 /// The user gives up ownership of the model object.
2523 /// The list of column names to be used for filling must always be specified.
2524 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2525 /// Also see RResultPtr.
2526 ///
2527 /// ### Example usage:
2528 /// ~~~{.cpp}
2529 /// MyClass obj;
2530 /// // Deduce column types (this invocation needs jitting internally, and in this case
2531 /// // MyClass needs to be known to the interpreter)
2532 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2533 /// // explicit column types
2534 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2535 /// ~~~
2536 ///
2537 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2538 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2539 {
2540 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2541 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2542 throw std::runtime_error("The absence of axes limits is not supported yet.");
2543 }
2544 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2545 columnList.size());
2546 }
2547
2548 ////////////////////////////////////////////////////////////////////////////
2549 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2550 ///
2551 /// \tparam V The type of the value column
2552 /// \param[in] value The name of the column with the values to fill the statistics with.
2553 /// \return the filled TStatistic object wrapped in a RResultPtr.
2554 ///
2555 /// ### Example usage:
2556 /// ~~~{.cpp}
2557 /// // Deduce column type (this invocation needs jitting internally)
2558 /// auto stats0 = myDf.Stats("values");
2559 /// // Explicit column type
2560 /// auto stats1 = myDf.Stats<float>("values");
2561 /// ~~~
2562 ///
2563 template <typename V = RDFDetail::RInferredType>
2564 RResultPtr<TStatistic> Stats(std::string_view value = "")
2565 {
2566 ColumnNames_t columns;
2567 if (!value.empty()) {
2568 columns.emplace_back(std::string(value));
2569 }
2570 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2571 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2572 return Fill(TStatistic(), validColumnNames);
2573 } else {
2574 return Fill<V>(TStatistic(), validColumnNames);
2575 }
2576 }
2577
2578 ////////////////////////////////////////////////////////////////////////////
2579 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2580 ///
2581 /// \tparam V The type of the value column
2582 /// \tparam W The type of the weight column
2583 /// \param[in] value The name of the column with the values to fill the statistics with.
2584 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2585 /// \return the filled TStatistic object wrapped in a RResultPtr.
2586 ///
2587 /// ### Example usage:
2588 /// ~~~{.cpp}
2589 /// // Deduce column types (this invocation needs jitting internally)
2590 /// auto stats0 = myDf.Stats("values", "weights");
2591 /// // Explicit column types
2592 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2593 /// ~~~
2594 ///
2595 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2596 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2597 {
2598 ColumnNames_t columns{std::string(value), std::string(weight)};
2599 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2600 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2601 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2602 // We have 3 cases:
2603 // 1. Both types are inferred: we use Fill and let the jit kick in.
2604 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2605 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2606 if (vIsInferred && wIsInferred) {
2607 return Fill(TStatistic(), validColumnNames);
2608 } else if (vIsInferred != wIsInferred) {
2609 std::string error("The ");
2610 error += vIsInferred ? "value " : "weight ";
2611 error += "column type is explicit, while the ";
2612 error += vIsInferred ? "weight " : "value ";
2613 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2614 throw std::runtime_error(error);
2615 } else {
2616 return Fill<V, W>(TStatistic(), validColumnNames);
2617 }
2618 }
2619
2620 ////////////////////////////////////////////////////////////////////////////
2621 /// \brief Return the minimum of processed column values (*lazy action*).
2622 /// \tparam T The type of the branch/column.
2623 /// \param[in] columnName The name of the branch/column to be treated.
2624 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2625 ///
2626 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2627 /// template specialization of this method.
2628 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2629 ///
2630 /// This action is *lazy*: upon invocation of this method the calculation is
2631 /// booked but not executed. Also see RResultPtr.
2632 ///
2633 /// ### Example usage:
2634 /// ~~~{.cpp}
2635 /// // Deduce column type (this invocation needs jitting internally)
2636 /// auto minVal0 = myDf.Min("values");
2637 /// // Explicit column type
2638 /// auto minVal1 = myDf.Min<double>("values");
2639 /// ~~~
2640 ///
2641 template <typename T = RDFDetail::RInferredType>
2642 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2643 {
2644 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2645 using RetType_t = RDFDetail::MinReturnType_t<T>;
2646 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2647 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2648 }
2649
2650 ////////////////////////////////////////////////////////////////////////////
2651 /// \brief Return the maximum of processed column values (*lazy action*).
2652 /// \tparam T The type of the branch/column.
2653 /// \param[in] columnName The name of the branch/column to be treated.
2654 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2655 ///
2656 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2657 /// template specialization of this method.
2658 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2659 ///
2660 /// This action is *lazy*: upon invocation of this method the calculation is
2661 /// booked but not executed. Also see RResultPtr.
2662 ///
2663 /// ### Example usage:
2664 /// ~~~{.cpp}
2665 /// // Deduce column type (this invocation needs jitting internally)
2666 /// auto maxVal0 = myDf.Max("values");
2667 /// // Explicit column type
2668 /// auto maxVal1 = myDf.Max<double>("values");
2669 /// ~~~
2670 ///
2671 template <typename T = RDFDetail::RInferredType>
2672 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2673 {
2674 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2675 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2676 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2677 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2678 }
2679
2680 ////////////////////////////////////////////////////////////////////////////
2681 /// \brief Return the mean of processed column values (*lazy action*).
2682 /// \tparam T The type of the branch/column.
2683 /// \param[in] columnName The name of the branch/column to be treated.
2684 /// \return the mean value of the selected column wrapped in a RResultPtr.
2685 ///
2686 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2687 /// template specialization of this method.
2688 ///
2689 /// This action is *lazy*: upon invocation of this method the calculation is
2690 /// booked but not executed. Also see RResultPtr.
2691 ///
2692 /// ### Example usage:
2693 /// ~~~{.cpp}
2694 /// // Deduce column type (this invocation needs jitting internally)
2695 /// auto meanVal0 = myDf.Mean("values");
2696 /// // Explicit column type
2697 /// auto meanVal1 = myDf.Mean<double>("values");
2698 /// ~~~
2699 ///
2700 template <typename T = RDFDetail::RInferredType>
2701 RResultPtr<double> Mean(std::string_view columnName = "")
2702 {
2703 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2704 auto meanV = std::make_shared<double>(0);
2705 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2706 }
2707
2708 ////////////////////////////////////////////////////////////////////////////
2709 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2710 /// \tparam T The type of the branch/column.
2711 /// \param[in] columnName The name of the branch/column to be treated.
2712 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2713 ///
2714 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2715 /// template specialization of this method.
2716 ///
2717 /// This action is *lazy*: upon invocation of this method the calculation is
2718 /// booked but not executed. Also see RResultPtr.
2719 ///
2720 /// ### Example usage:
2721 /// ~~~{.cpp}
2722 /// // Deduce column type (this invocation needs jitting internally)
2723 /// auto stdDev0 = myDf.StdDev("values");
2724 /// // Explicit column type
2725 /// auto stdDev1 = myDf.StdDev<double>("values");
2726 /// ~~~
2727 ///
2728 template <typename T = RDFDetail::RInferredType>
2729 RResultPtr<double> StdDev(std::string_view columnName = "")
2730 {
2731 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2732 auto stdDeviationV = std::make_shared<double>(0);
2733 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2734 }
2735
2736 // clang-format off
2737 ////////////////////////////////////////////////////////////////////////////
2738 /// \brief Return the sum of processed column values (*lazy action*).
2739 /// \tparam T The type of the branch/column.
2740 /// \param[in] columnName The name of the branch/column.
2741 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2742 /// \return the sum of the selected column wrapped in a RResultPtr.
2743 ///
2744 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2745 /// template specialization of this method.
2746 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2747 ///
2748 /// This action is *lazy*: upon invocation of this method the calculation is
2749 /// booked but not executed. Also see RResultPtr.
2750 ///
2751 /// ### Example usage:
2752 /// ~~~{.cpp}
2753 /// // Deduce column type (this invocation needs jitting internally)
2754 /// auto sum0 = myDf.Sum("values");
2755 /// // Explicit column type
2756 /// auto sum1 = myDf.Sum<double>("values");
2757 /// ~~~
2758 ///
2759 template <typename T = RDFDetail::RInferredType>
2761 Sum(std::string_view columnName = "",
2762 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2763 {
2764 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2765 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2766 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2767 }
2768 // clang-format on
2769
2770 ////////////////////////////////////////////////////////////////////////////
2771 /// \brief Gather filtering statistics.
2772 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2773 ///
2774 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2775 /// all named filters in the call graph. Calling this method on a
2776 /// stored chain state (i.e. a graph node different from the first) gathers
2777 /// the stats for all named filters in the chain section between the original
2778 /// `RDataFrame` and that node (included). Stats are gathered in the same
2779 /// order as the named filters have been added to the graph.
2780 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2781 /// effects cuts had.
2782 ///
2783 /// This action is *lazy*: upon invocation of
2784 /// this method the calculation is booked but not executed. See RResultPtr
2785 /// documentation.
2786 ///
2787 /// ### Example usage:
2788 /// ~~~{.cpp}
2789 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2790 /// auto cutReport = filtered3.Report();
2791 /// cutReport->Print();
2792 /// ~~~
2793 ///
2795 {
2796 bool returnEmptyReport = false;
2797 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2798 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2799 // certainly does not contain named filters.
2800 // The number 4 takes into account the implicit columns for entry and slot number
2801 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2802 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
2803 returnEmptyReport = true;
2804
2805 auto rep = std::make_shared<RCutFlowReport>();
2806 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2808
2809 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2811
2812 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2813 }
2814
2815 /// \brief Returns the names of the filters created.
2816 /// \return the container of filters names.
2817 ///
2818 /// If called on a root node, all the filters in the computation graph will
2819 /// be printed. For any other node, only the filters upstream of that node.
2820 /// Filters without a name are printed as "Unnamed Filter"
2821 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2822 ///
2823 /// ### Example usage:
2824 /// ~~~{.cpp}
2825 /// auto filtNames = d.GetFilterNames();
2826 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2827 /// ~~~
2828 ///
2829 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2830
2831 // clang-format off
2832 ////////////////////////////////////////////////////////////////////////////
2833 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2834 /// \tparam F The type of the aggregator callable. Automatically deduced.
2835 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2836 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2837 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2838 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2839 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2840 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2841 /// \return the result of the aggregation wrapped in a RResultPtr.
2842 ///
2843 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2844 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2845 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2846 /// the value of the column columnName.
2847 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2848 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2849 ///
2850 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2851 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2852 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2853 ///
2854 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2855 ///
2856 /// Example usage:
2857 /// ~~~{.cpp}
2858 /// auto aggregator = [](double acc, double x) { return acc * x; };
2859 /// ROOT::EnableImplicitMT();
2860 /// // If multithread is enabled, the aggregator function will be called by more threads
2861 /// // and will produce a vector of partial accumulators.
2862 /// // The merger function performs the final aggregation of these partial results.
2863 /// auto merger = [](std::vector<double> &accumulators) {
2864 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2865 /// accumulators[0] *= accumulators[i];
2866 /// }
2867 /// };
2868 ///
2869 /// // The accumulator is initialized at this value by every thread.
2870 /// double initValue = 1.;
2871 ///
2872 /// // Multiplies all elements of the column "x"
2873 /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
2874 /// ~~~
2875 // clang-format on
2876 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2877 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2878 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2879 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2880 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2881 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2882 {
2883 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2884 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2885
2886 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2887 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2888
2889 auto accObjPtr = std::make_shared<U>(aggIdentity);
2890 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2892 auto action = std::make_unique<Action_t>(
2893 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2895 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2896 }
2897
2898 // clang-format off
2899 ////////////////////////////////////////////////////////////////////////////
2900 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2901 /// \tparam F The type of the aggregator callable. Automatically deduced.
2902 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2903 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2904 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2905 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2906 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2907 /// \return the result of the aggregation wrapped in a RResultPtr.
2908 ///
2909 /// See previous Aggregate overload for more information.
2910 // clang-format on
2911 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2912 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2913 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2914 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2915 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2916 {
2917 static_assert(
2918 std::is_default_constructible<U>::value,
2919 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2920 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2921 }
2922
2923 // clang-format off
2924 ////////////////////////////////////////////////////////////////////////////
2925 /// \brief Book execution of a custom action using a user-defined helper object.
2926 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2927 /// \tparam OtherColumns A list of the types of the other columns used by this action
2928 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2929 /// \param[in] helper The Action Helper to be scheduled.
2930 /// \param[in] columns The names of the columns on which the helper acts.
2931 /// \return the result of the helper wrapped in a RResultPtr.
2932 ///
2933 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2934 /// Helper object provided by the caller. The required interface for the helper is described below (more
2935 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2936 ///
2937 /// ### Mandatory interface
2938 ///
2939 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
2940 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
2941 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
2942 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
2943 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2944 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
2945 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
2946 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2947 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2948 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
2949 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
2950 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
2951 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
2952 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
2953 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2954 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2955 /// the requested columns for the particular entry being processed.
2956 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2957 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
2958 /// diagnostics, SaveGraph(), etc.
2959 ///
2960 /// ### Optional methods
2961 ///
2962 /// If these methods are implemented they enable extra functionality as per the description below.
2963 ///
2964 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
2965 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers.
2966 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
2967 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
2968 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2969 /// of every sample, as in DefinePerSample().
2970 /// * `Helper MakeNew(void *newResult)`: if implemented, it enables varying the action's result with VariationsFor(). It takes a
2971 /// type-erased new result that can be safely cast to a `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should
2972 /// be used as the action's output result.
2973 ///
2974 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
2975 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
2976 ///
2977 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2978 ///
2979 /// ### Examples
2980 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
2981 ///
2982 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2983 ///
2984 // clang-format on
2985 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2987 {
2988 using HelperT = std::decay_t<Helper>;
2989 // TODO add more static sanity checks on Helper
2991 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2992 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2993
2994 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2995 auto resPtr = hPtr->GetResultPtr();
2996
2997 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2998 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2999 } else {
3000 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
3001 fProxiedPtr, columns.size());
3002 }
3003 }
3004
3005 ////////////////////////////////////////////////////////////////////////////
3006 /// \brief Provides a representation of the columns in the dataset.
3007 /// \tparam ColumnTypes variadic list of branch/column types.
3008 /// \param[in] columnList Names of the columns to be displayed.
3009 /// \param[in] nRows Number of events for each column to be displayed.
3010 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3011 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3012 ///
3013 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3014 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3015 /// return a complete version through `RDisplay::AsString()`.
3016 ///
3017 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3018 /// RResultPtr.
3019 ///
3020 /// Example usage:
3021 /// ~~~{.cpp}
3022 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3023 /// auto d1 = rdf.Display("");
3024 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3025 /// auto d2 = d.Display({"x", "y"}, 128);
3026 /// // Printing the short representations, the event loop will run
3027 /// d1->Print();
3028 /// d2->Print();
3029 /// ~~~
3030 template <typename... ColumnTypes>
3031 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3032 {
3033 CheckIMTDisabled("Display");
3034 auto newCols = columnList;
3035 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3036 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3037 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3038 // Need to add ULong64_t type corresponding to the first column rdfentry_
3039 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3040 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
3041 }
3042
3043 ////////////////////////////////////////////////////////////////////////////
3044 /// \brief Provides a representation of the columns in the dataset.
3045 /// \param[in] columnList Names of the columns to be displayed.
3046 /// \param[in] nRows Number of events for each column to be displayed.
3047 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3048 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3049 ///
3050 /// This overload automatically infers the column types.
3051 /// See the previous overloads for further details.
3052 ///
3053 /// Invoked when no types are specified to Display
3054 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3055 {
3056 CheckIMTDisabled("Display");
3057 auto newCols = columnList;
3058 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3059 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3060 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3061 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3062 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
3063 columnList.size() + 1);
3064 }
3065
3066 ////////////////////////////////////////////////////////////////////////////
3067 /// \brief Provides a representation of the columns in the dataset.
3068 /// \param[in] columnNameRegexp A regular expression to select the columns.
3069 /// \param[in] nRows Number of events for each column to be displayed.
3070 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3071 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3072 ///
3073 /// The existing columns are matched against the regular expression. If the string provided
3074 /// is empty, all columns are selected.
3075 /// See the previous overloads for further details.
3077 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3078 {
3079 const auto columnNames = GetColumnNames();
3080 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3081 return Display(selectedColumns, nRows, nMaxCollectionElements);
3082 }
3083
3084 ////////////////////////////////////////////////////////////////////////////
3085 /// \brief Provides a representation of the columns in the dataset.
3086 /// \param[in] columnList Names of the columns to be displayed.
3087 /// \param[in] nRows Number of events for each column to be displayed.
3088 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3089 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3090 ///
3091 /// See the previous overloads for further details.
3093 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3094 {
3095 ColumnNames_t selectedColumns(columnList);
3096 return Display(selectedColumns, nRows, nMaxCollectionElements);
3097 }
3098
3099private:
3100 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3101 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3102 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3103 {
3104 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3108 } else {
3112 }
3113
3114 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3115 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3116 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
3117 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3118 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
3119
3120 constexpr auto nColumns = ColTypes_t::list_size;
3121
3122 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3123 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3124
3125 // Declare return type to the interpreter, for future use by jitted actions
3126 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3127 if (retTypeName.empty()) {
3128 // The type is not known to the interpreter.
3129 // We must not error out here, but if/when this column is used in jitted code
3130 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3131 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3132 }
3133
3134 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3135 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3137
3139 newCols.AddDefine(std::move(newColumn));
3140
3141 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3142
3143 return newInterface;
3144 }
3145
3146 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3147 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3148 // this way compilation of `Define` has no way to continue after throwing the error.
3149 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3150 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3151 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3152 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3153 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
3154 {
3155 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3156 "Error in `Define`: type returned by expression is not default-constructible");
3157 return *this; // never reached
3158 }
3159
3160 template <typename... ColumnTypes>
3161 RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
3162 const ColumnNames_t &columnList, const RSnapshotOptions &options)
3163 {
3164 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3165
3166 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3167 // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
3168 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3171
3172 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3173 const auto &treename = parsedTreePath.fTreeName;
3174 const auto &dirname = parsedTreePath.fDirName;
3175
3176 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3177 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3178
3180
3181 // The CreateLMFromTTree function by default opens the file passed as input
3182 // to check for the presence of the TTree inside. But at this moment the
3183 // filename we are using here corresponds to a file which does not exist yet,
3184 // i.e. the output file of the Snapshot call. Thus, checkFile=false will
3185 // prevent the function from trying to open a non-existent file.
3186 auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree(
3187 fullTreeName, filename, /*defaultColumns=*/columnListWithoutSizeColumns, /*checkFile=*/false));
3188
3189 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3190 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names.
3191 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3192 fProxiedPtr);
3193
3194 if (!options.fLazy)
3195 *resPtr;
3196 return resPtr;
3197 }
3198
3199 ////////////////////////////////////////////////////////////////////////////
3200 /// \brief Implementation of cache.
3201 template <typename... ColTypes, std::size_t... S>
3202 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3203 {
3204 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3205
3206 // Check at compile time that the columns types are copy constructible
3207 constexpr bool areCopyConstructible =
3208 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3209 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3210
3211 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3212
3213 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3214 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3215 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3216
3217 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3218
3219 return cachedRDF;
3220 }
3221
3222 template <bool IsSingleColumn, typename F>
3224 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3225 const std::vector<std::string> &variationTags, std::string_view variationName)
3226 {
3227 using F_t = std::decay_t<F>;
3228 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3229 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3230 constexpr auto nColumns = ColTypes_t::list_size;
3231
3232 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
3233
3234 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
3235 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
3236
3237 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3238 if (retTypeName.empty()) {
3239 // The type is not known to the interpreter, but we don't want to error out
3240 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3241 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3242 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3243 }
3244
3245 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3246 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3247 validColumnNames);
3248
3250 newCols.AddVariation(std::move(variation));
3251
3252 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3253
3254 return newInterface;
3255 }
3256
3257 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3258 const std::vector<std::string> &variationTags,
3259 std::string_view variationName, bool isSingleColumn)
3260 {
3261 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
3262 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
3263 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3264
3265 for (auto &colName : colNames) {
3266 RDFInternal::CheckValidCppVarName(colName, "Vary");
3267 RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(),
3269 }
3270 RDFInternal::CheckValidCppVarName(variationName, "Vary");
3271
3272 // when varying multiple columns, they must be different columns
3273 if (colNames.size() > 1) {
3274 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3275 if (uniqueCols.size() != colNames.size())
3276 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3277 }
3278
3279 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3280 auto jittedVariation =
3281 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
3282 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
3283
3285 newColRegister.AddVariation(std::move(jittedVariation));
3286
3287 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
3288
3289 return newInterface;
3290 }
3291
3292 template <typename Helper, typename ActionResultType>
3293 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3294 const std::shared_ptr<Helper> &hPtr,
3296 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3297 {
3298 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
3299 }
3300
3301 template <typename Helper, typename ActionResultType, typename... Others>
3302 RResultPtr<ActionResultType>
3303 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3304 const std::shared_ptr<Helper>& /*hPtr*/,
3305 Others...)
3306 {
3307 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3308 "columns! The action helper type was ") +
3309 typeid(Helper).name());
3310 return {};
3311 }
3312
3313protected:
3314 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3315 const RDFInternal::RColumnRegister &colRegister)
3316 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
3317 {
3318 }
3319
3320 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3321};
3322
3323} // namespace RDF
3324
3325} // namespace ROOT
3326
3327#endif // ROOT_RDF_INTERFACE
#define f(i)
Definition RSha256.hxx:104
#define h(i)
Definition RSha256.hxx:106
unsigned int UInt_t
Definition RtypesCore.h:46
long long Long64_t
Definition RtypesCore.h:69
unsigned long long ULong64_t
Definition RtypesCore.h:70
#define X(type, name)
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
constexpr Int_t kError
Definition TError.h:47
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
implementation of FilterAvailable and FilterMissing operations
The head node of a RDF computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition RAction.hxx:53
A binder for user-defined columns, variations and aliases.
std::vector< std::string_view > GenerateColumnNames() const
Return the list of the names of the defined columns (Defines + Aliases).
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
The dataset specification for RDataFrame.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
void CheckIMTDisabled(std::string_view callerName)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
The public interface to the RDataFrame federation of classes.
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for a single existing column using auto-generated variation tags.
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for a single existing column using auto-generated variation tags.
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated tags.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for a single existing column using custom variation tags.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated variation tags.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RInterface< Proxied, DS_t > VaryImpl(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > FilterAvailable(std::string_view column)
Discard entries with missing values.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for a single existing column using custom variation tags.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > FilterMissing(std::string_view column)
Keep only the entries that have missing values.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RInterface< Proxied, DS_t > JittedVaryImpl(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName, bool isSingleColumn)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for for multiple existing columns using custom variation tags.
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > DefaultValueFor(std::string_view column, const T &defaultValue)
In case the value in the given column is missing, provide a default value.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated variation tags.
A RDataSource implementation which is built on top of result proxies.
Smart pointer for the return type of actions.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define F(x, y, z)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > CreateLMFromTTree(std::string_view datasetName, std::string_view fileNameGlob, const std::vector< std::string > &defaultColumns, bool checkFile=true)
Create an RLoopManager that reads a TChain.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair< ULong64_t, ULong64_t > &&newRange)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec)
Changes the input dataset specification of an RDataFrame.
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:119
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string GetDataSourceLabel(const ROOT::RDF::RNode &node)
std::string PrettyPrintAddr(const void *const addr)
void TriggerRun(ROOT::RDF::RNode node)
Trigger the execution of an RDataFrame computation graph.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
std::vector< std::string > ColumnNames_t
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition TROOT.cxx:556
Definition graph.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition Utils.hxx:141
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.