Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RDefine.hxx"
21#include "ROOT/RDF/RFilter.hxx"
26#include "ROOT/RDF/RRange.hxx"
27#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RResultPtr.hxx"
32#include "ROOT/RStringView.hxx"
33#include "ROOT/RVec.hxx"
34#include "ROOT/TypeTraits.hxx"
35#include "RtypesCore.h" // for ULong64_t
36#include "TDirectory.h"
37#include "TH1.h" // For Histo actions
38#include "TH2.h" // For Histo actions
39#include "TH3.h" // For Histo actions
40#include "THn.h"
41#include "TProfile.h"
42#include "TProfile2D.h"
43#include "TStatistic.h"
44
45#include <algorithm>
46#include <cstddef>
47#include <initializer_list>
48#include <iterator> // std::back_insterter
49#include <limits>
50#include <memory>
51#include <set>
52#include <sstream>
53#include <stdexcept>
54#include <string>
55#include <type_traits> // is_same, enable_if
56#include <typeinfo>
57#include <unordered_set>
58#include <utility> // std::index_sequence
59#include <vector>
60
61class TGraph;
62
63// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
64namespace ROOT {
67void EnableImplicitMT(UInt_t numthreads);
68class RDataFrame;
69} // namespace ROOT
70namespace cling {
71std::string printValue(ROOT::RDataFrame *tdf);
72}
73
74namespace ROOT {
75namespace RDF {
78namespace TTraits = ROOT::TypeTraits;
79
80template <typename Proxied, typename DataSource>
81class RInterface;
82
83using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
84} // namespace RDF
85
86namespace Internal {
87namespace RDF {
89void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
92} // namespace RDF
93} // namespace Internal
94
95namespace RDF {
96
97// clang-format off
98/**
99 * \class ROOT::RDF::RInterface
100 * \ingroup dataframe
101 * \brief The public interface to the RDataFrame federation of classes.
102 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
103 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
104 *
105 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
106 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
107 */
108// clang-format on
109template <typename Proxied, typename DataSource = void>
111 using DS_t = DataSource;
115 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
117
118 template <typename T, typename W>
119 friend class RInterface;
120
122 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
124
125 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
126
127public:
128 ////////////////////////////////////////////////////////////////////////////
129 /// \brief Copy-assignment operator for RInterface.
130 RInterface &operator=(const RInterface &) = default;
131
132 ////////////////////////////////////////////////////////////////////////////
133 /// \brief Copy-ctor for RInterface.
134 RInterface(const RInterface &) = default;
135
136 ////////////////////////////////////////////////////////////////////////////
137 /// \brief Move-ctor for RInterface.
138 RInterface(RInterface &&) = default;
139
140 ////////////////////////////////////////////////////////////////////////////
141 /// \brief Move-assignment operator for RInterface.
143
144 ////////////////////////////////////////////////////////////////////////////
145 /// \brief Build a RInterface from a RLoopManager.
146 /// This constructor is only available for RInterface<RLoopManager>.
147 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
148 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
149 {
150 }
151
152 ////////////////////////////////////////////////////////////////////////////
153 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
154 /// Different RDataFrame methods return different C++ types. All nodes, however,
155 /// can be cast to this common type at the cost of a small performance penalty.
156 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
157 /// around via (non-template, C++11) helper functions.
158 /// Example usage:
159 /// ~~~{.cpp}
160 /// // a function that conditionally adds a Range to a RDataFrame node.
161 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
162 /// {
163 /// return mustAddRange ? df.Range(1) : df;
164 /// }
165 /// // use as :
166 /// ROOT::RDataFrame df(10);
167 /// auto maybeRanged = MaybeAddRange(df, true);
168 /// ~~~
169 /// Note that it is not a problem to pass RNode's by value.
170 operator RNode() const
171 {
172 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
173 }
174
175 ////////////////////////////////////////////////////////////////////////////
176 /// \brief Append a filter to the call graph.
177 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
178 /// signalling whether the event has passed the selection (true) or not (false).
179 /// \param[in] columns Names of the columns/branches in input to the filter function.
180 /// \param[in] name Optional name of this filter. See `Report`.
181 /// \return the filter node of the computation graph.
182 ///
183 /// Append a filter node at the point of the call graph corresponding to the
184 /// object this method is called on.
185 /// The callable `f` should not have side-effects (e.g. modification of an
186 /// external or static variable) to ensure correct results when implicit
187 /// multi-threading is active.
188 ///
189 /// RDataFrame only evaluates filters when necessary: if multiple filters
190 /// are chained one after another, they are executed in order and the first
191 /// one returning false causes the event to be discarded.
192 /// Even if multiple actions or transformations depend on the same filter,
193 /// it is executed once per entry. If its result is requested more than
194 /// once, the cached result is served.
195 ///
196 /// ### Example usage:
197 /// ~~~{.cpp}
198 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
199 /// auto filtered = df.Filter(myCut, {"x", "y"});
200 ///
201 /// // String: it must contain valid C++ except that column names can be used instead of variable names
202 /// auto filtered = df.Filter("x*y > 0");
203 /// ~~~
204 ///
205 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
206 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
207 /// ~~~{.cpp}
208 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
209 /// ~~~
210 /// but instead this will:
211 /// ~~~{.cpp}
212 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
213 /// ~~~
214 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
216 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
217 {
218 RDFInternal::CheckFilter(f);
219 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
220 constexpr auto nColumns = ColTypes_t::list_size;
221 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
222 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
223
225
226 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
227 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
228 }
229
230 ////////////////////////////////////////////////////////////////////////////
231 /// \brief Append a filter to the call graph.
232 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
233 /// signalling whether the event has passed the selection (true) or not (false).
234 /// \param[in] name Optional name of this filter. See `Report`.
235 /// \return the filter node of the computation graph.
236 ///
237 /// Refer to the first overload of this method for the full documentation.
238 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
240 {
241 // The sfinae is there in order to pick up the overloaded method which accepts two strings
242 // rather than this template method.
243 return Filter(f, {}, name);
244 }
245
246 ////////////////////////////////////////////////////////////////////////////
247 /// \brief Append a filter to the call graph.
248 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
249 /// signalling whether the event has passed the selection (true) or not (false).
250 /// \param[in] columns Names of the columns/branches in input to the filter function.
251 /// \return the filter node of the computation graph.
252 ///
253 /// Refer to the first overload of this method for the full documentation.
254 template <typename F>
255 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
256 {
257 return Filter(f, ColumnNames_t{columns});
258 }
259
260 ////////////////////////////////////////////////////////////////////////////
261 /// \brief Append a filter to the call graph.
262 /// \param[in] expression The filter expression in C++
263 /// \param[in] name Optional name of this filter. See `Report`.
264 /// \return the filter node of the computation graph.
265 ///
266 /// The expression is just-in-time compiled and used to filter entries. It must
267 /// be valid C++ syntax in which variable names are substituted with the names
268 /// of branches/columns.
269 ///
270 /// ### Example usage:
271 /// ~~~{.cpp}
272 /// auto filtered_df = df.Filter("myCollection.size() > 3");
273 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
274 /// ~~~
275 ///
276 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
277 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
278 /// ~~~{.cpp}
279 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
280 /// ~~~
281 /// but instead this will:
282 /// ~~~{.cpp}
283 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
284 /// ~~~
285 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
286 {
287 // deleted by the jitted call to JitFilterHelper
288 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
289 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
290 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
291 const auto jittedFilter =
294
296 }
297
298 // clang-format off
299 ////////////////////////////////////////////////////////////////////////////
300 /// \brief Define a new column.
301 /// \param[in] name The name of the defined column.
302 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
303 /// \param[in] columns Names of the columns/branches in input to the producer function.
304 /// \return the first node of the computation graph for which the new quantity is defined.
305 ///
306 /// Define a column that will be visible from all subsequent nodes
307 /// of the functional chain. The `expression` is only evaluated for entries that pass
308 /// all the preceding filters.
309 /// A new variable is created called `name`, accessible as if it was contained
310 /// in the dataset from subsequent transformations/actions.
311 ///
312 /// Use cases include:
313 /// * caching the results of complex calculations for easy and efficient multiple access
314 /// * extraction of quantities of interest from complex objects
315 ///
316 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
317 ///
318 /// ### Example usage:
319 /// ~~~{.cpp}
320 /// // assuming a function with signature:
321 /// double myComplexCalculation(const RVec<float> &muon_pts);
322 /// // we can pass it directly to Define
323 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
324 /// // alternatively, we can pass the body of the function as a string, as in Filter:
325 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
326 /// ~~~
327 ///
328 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
329 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
330 /// ~~~{.cpp}
331 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
332 /// ~~~
333 /// but instead this will:
334 /// ~~~{.cpp}
335 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
336 /// ~~~
337 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
338 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
339 {
340 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
341 }
342 // clang-format on
343
344 // clang-format off
345 ////////////////////////////////////////////////////////////////////////////
346 /// \brief Define a new column with a value dependent on the processing slot.
347 /// \param[in] name The name of the defined column.
348 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
349 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
350 /// \return the first node of the computation graph for which the new quantity is defined.
351 ///
352 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
353 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
354 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
355 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
356 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
357 ///
358 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
359 /// ~~~{.cpp}
360 /// int function(unsigned int, double, double);
361 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
362 /// df.DefineSlot("x", function, {"column1", "column2"})
363 /// ~~~
364 ///
365 /// See Define() for more information.
366 template <typename F>
367 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
368 {
369 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
370 }
371 // clang-format on
372
373 // clang-format off
374 ////////////////////////////////////////////////////////////////////////////
375 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
376 /// \param[in] name The name of the defined column.
377 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
378 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
379 /// \return the first node of the computation graph for which the new quantity is defined.
380 ///
381 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
382 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
383 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
384 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
385 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
386 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
387 ///
388 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
389 /// ~~~{.cpp}
390 /// int function(unsigned int, ULong64_t, double, double);
391 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
392 /// DefineSlotEntry("x", function, {"column1", "column2"})
393 /// ~~~
394 ///
395 /// See Define() for more information.
396 template <typename F>
397 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
398 {
399 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
400 "DefineSlotEntry");
401 }
402 // clang-format on
403
404 ////////////////////////////////////////////////////////////////////////////
405 /// \brief Define a new column.
406 /// \param[in] name The name of the defined column.
407 /// \param[in] expression An expression in C++ which represents the defined value
408 /// \return the first node of the computation graph for which the new quantity is defined.
409 ///
410 /// The expression is just-in-time compiled and used to produce the column entries.
411 /// It must be valid C++ syntax in which variable names are substituted with the names
412 /// of branches/columns.
413 ///
414 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
415 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
416 /// ~~~{.cpp}
417 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
418 /// ~~~
419 /// but instead this will:
420 /// ~~~{.cpp}
421 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
422 /// ~~~
423 ///
424 /// Refer to the first overload of this method for the full documentation.
425 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
426 {
427 constexpr auto where = "Define";
429 // these checks must be done before jitting lest we throw exceptions in jitted code
432
433 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
434 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
435 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
436
438 newCols.AddDefine(std::move(jittedDefine));
439
440 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
441
442 return newInterface;
443 }
444
445 ////////////////////////////////////////////////////////////////////////////
446 /// \brief Overwrite the value and/or type of an existing column.
447 /// \param[in] name The name of the column to redefine.
448 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
449 /// \param[in] columns Names of the columns/branches in input to the expression.
450 /// \return the first node of the computation graph for which the quantity is redefined.
451 ///
452 /// The old value of the column can be used as an input for the expression.
453 ///
454 /// An exception is thrown in case the column to redefine does not already exist.
455 /// See Define() for more information.
456 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
457 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
458 {
459 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
460 }
461
462 // clang-format off
463 ////////////////////////////////////////////////////////////////////////////
464 /// \brief Overwrite the value and/or type of an existing column.
465 /// \param[in] name The name of the column to redefine.
466 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
467 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
468 /// \return the first node of the computation graph for which the new quantity is defined.
469 ///
470 /// The old value of the column can be used as an input for the expression.
471 /// An exception is thrown in case the column to redefine does not already exist.
472 ///
473 /// See DefineSlot() for more information.
474 // clang-format on
475 template <typename F>
476 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
477 {
478 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
479 }
480
481 // clang-format off
482 ////////////////////////////////////////////////////////////////////////////
483 /// \brief Overwrite the value and/or type of an existing column.
484 /// \param[in] name The name of the column to redefine.
485 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
486 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
487 /// \return the first node of the computation graph for which the new quantity is defined.
488 ///
489 /// The old value of the column can be used as an input for the expression.
490 /// An exception is thrown in case the column to re-define does not already exist.
491 ///
492 /// See DefineSlotEntry() for more information.
493 // clang-format on
494 template <typename F>
495 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
496 {
497 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
498 "RedefineSlotEntry");
499 }
500
501 ////////////////////////////////////////////////////////////////////////////
502 /// \brief Overwrite the value and/or type of an existing column.
503 /// \param[in] name The name of the column to redefine.
504 /// \param[in] expression An expression in C++ which represents the defined value
505 /// \return the first node of the computation graph for which the new quantity is defined.
506 ///
507 /// The expression is just-in-time compiled and used to produce the column entries.
508 /// It must be valid C++ syntax in which variable names are substituted with the names
509 /// of branches/columns.
510 ///
511 /// The old value of the column can be used as an input for the expression.
512 /// An exception is thrown in case the column to re-define does not already exist.
513 ///
514 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
515 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
516 {
517 constexpr auto where = "Redefine";
522
523 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
524 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
525 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
526
528 newCols.AddDefine(std::move(jittedDefine));
529
530 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
531
532 return newInterface;
533 }
534
535 // clang-format off
536 ////////////////////////////////////////////////////////////////////////////
537 /// \brief Define a new column that is updated when the input sample changes.
538 /// \param[in] name The name of the defined column.
539 /// \param[in] expression A C++ callable that computes the new value of the defined column.
540 /// \return the first node of the computation graph for which the new quantity is defined.
541 ///
542 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
543 /// where:
544 /// - `T` is the type of the defined column
545 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
546 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
547 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
548 /// being processed (see the class docs for more information).
549 ///
550 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
551 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
552 /// starts rather than at every entry.
553 ///
554 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
555 ///
556 /// ### Example usage:
557 /// ~~~{.cpp}
558 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
559 /// df.DefinePerSample("weightbysample",
560 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
561 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
562 /// ~~~
563 // clang-format on
564 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
565 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
566 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
567 {
568 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
571
572 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
573 if (retTypeName.empty()) {
574 // The type is not known to the interpreter.
575 // We must not error out here, but if/when this column is used in jitted code
576 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
577 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
578 }
579
580 auto newColumn =
581 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
582
584 newCols.AddDefine(std::move(newColumn));
585 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
586 return newInterface;
587 }
588
589 // clang-format off
590 ////////////////////////////////////////////////////////////////////////////
591 /// \brief Define a new column that is updated when the input sample changes.
592 /// \param[in] name The name of the defined column.
593 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
594 /// \return the first node of the computation graph for which the new quantity is defined.
595 ///
596 /// The expression is just-in-time compiled and used to produce the column entries.
597 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
598 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
599 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
600 ///
601 /// ### Example usage:
602 /// ~~~{.py}
603 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
604 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
605 /// ~~~
606 ///
607 /// \note
608 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
609 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
610 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
611 /// ~~~{.py}
612 /// ROOT.gInterpreter.Declare(
613 /// """
614 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
615 /// return id.Contains("sample1") ? 1.0f : 2.0f;
616 /// }
617 /// """)
618 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
619 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
620 /// ~~~
621 ///
622 /// \note
623 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
624 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
625 /// sample even starts, so column values are not accessible.
626 // clang-format on
627 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
628 {
629 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
630 // these checks must be done before jitting lest we throw exceptions in jitted code
633
634 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
635 auto jittedDefine =
636 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
637
639 newCols.AddDefine(std::move(jittedDefine));
640
641 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
642
643 return newInterface;
644 }
645
646 /// \brief Register systematic variations for an existing column.
647 /// \param[in] colName name of the column for which varied values are provided.
648 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
649 /// take any column values as input, similarly to what happens with Filter and Define calls. It must
650 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
651 /// \param[in] inputColumns the names of the columns to be passed to the callable.
652 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
653 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
654 ///
655 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
656 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
657 /// results that depend on any varied quantity a map/dictionary of varied results can be produced with
658 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
659 ///
660 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
661 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
662 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
663 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
664 ///
665 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
666 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
667 /// ~~~{.cpp}
668 /// auto nominal_hx =
669 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
670 /// .Filter("pt > k")
671 /// .Define("x", someFunc, {"pt"})
672 /// .Histo1D("x");
673 ///
674 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
675 /// hx["nominal"].Draw();
676 /// hx["pt:down"].Draw("SAME");
677 /// ~~~
678 /// RDataFrame computes all variations as part of a single loop over the data.
679 /// In particular, this means that I/O and computation of values shared
680 /// among variations only happen once for all variations. Thus, the event loop
681 /// run-time typically scales much better than linearly with the number of
682 /// variations.
683 ///
684 /// RDataFrame lazily computes the varied values required to produce the
685 /// outputs of VariationsFor(). If VariationsFor() was not called for a result,
686 /// the computations run are only for the nominal case.
687 template <typename F>
688 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
689 const std::vector<std::string> &variationTags, std::string_view variationName = "")
690 {
691 std::vector<std::string> colNames{{std::string(colName)}};
692 const std::string theVariationName{variationName.empty() ? colName : variationName};
693
694 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
695 theVariationName);
696 }
697
698 /// \brief Register systematic variations for an existing columns using auto-generated variation tags.
699 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
700 /// will be auto-generated as the sequence 0...nVariations-1.
701 /// See the documentation of the previous overload for more information.
702 template <typename F>
703 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
704 std::size_t nVariations, std::string_view variationName = "")
705 {
706 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
707
708 std::vector<std::string> variationTags;
709 variationTags.reserve(nVariations);
710 for (std::size_t i = 0u; i < nVariations; ++i)
711 variationTags.emplace_back(std::to_string(i));
712
713 const std::string theVariationName{variationName.empty() ? colName : variationName};
714
715 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
716 }
717
718 /// \brief Register a systematic variation that affects multiple columns simultaneously.
719 /// This overload of Vary takes a list of column names as first argument rather than a single name and
720 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
721 /// affected column.
722 /// See the documentation of the first Vary overload for more information.
723 ///
724 /// Example usage:
725 /// ~~~{.cpp}
726 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
727 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
728 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
729 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
730 /// {"down", "up"}, // variation tags
731 /// "ptAndEta"); // variation name
732 /// ~~~
733 template <typename F>
735 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
736 const std::vector<std::string> &variationTags, std::string_view variationName)
737 {
738 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
739 }
740
741 /// Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
742 template <typename F>
744 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
745 const std::vector<std::string> &variationTags, std::string_view variationName)
746 {
747 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
748 }
749
750 /// \brief Register systematic variations for one or more existing columns using auto-generated tags.
751 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
752 /// will be auto-generated as the sequence 0...nVariations-1.
753 /// See the documentation of the previous overload for more information.
754 template <typename F>
756 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
757 std::size_t nVariations, std::string_view variationName)
758 {
759 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
760
761 std::vector<std::string> variationTags;
762 variationTags.reserve(nVariations);
763 for (std::size_t i = 0u; i < nVariations; ++i)
764 variationTags.emplace_back(std::to_string(i));
765
766 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
767 }
768
769 /// Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
770 template <typename F>
772 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
773 std::size_t nVariations, std::string_view variationName)
774 {
775 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
776 }
777
778 /// \brief Register systematic variations for an existing column.
779 /// \param[in] colName name of the column for which varied values are provided.
780 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
781 /// values for the specified column.
782 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
783 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
784 /// colName is used if none is provided.
785 ///
786 /// ~~~{.cpp}
787 /// auto nominal_hx =
788 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
789 /// .Filter("pt > k")
790 /// .Define("x", someFunc, {"pt"})
791 /// .Histo1D("x");
792 ///
793 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
794 /// hx["nominal"].Draw();
795 /// hx["pt:down"].Draw("SAME");
796 /// ~~~
797 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
798 const std::vector<std::string> &variationTags, std::string_view variationName = "")
799 {
800 std::vector<std::string> colNames{{std::string(colName)}};
801 const std::string theVariationName{variationName.empty() ? colName : variationName};
802
803 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
804 }
805
806 /// \brief Register systematic variations for an existing column.
807 /// \param[in] colName name of the column for which varied values are provided.
808 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
809 /// values for the specified column.
810 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
811 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
812 /// colName is used if none is provided.
813 ///
814 /// See the documentation for the previous overload for more information.
815 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
816 std::string_view variationName = "")
817 {
818 std::vector<std::string> variationTags;
819 variationTags.reserve(nVariations);
820 for (std::size_t i = 0u; i < nVariations; ++i)
821 variationTags.emplace_back(std::to_string(i));
822
823 return Vary(colName, expression, std::move(variationTags), variationName);
824 }
825
826 /// \brief Register systematic variations for one or more existing columns.
827 /// \param[in] colNames names of the columns for which varied values are provided.
828 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
829 /// values for the specified columns.
830 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
831 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
832 ///
833 /// ~~~{.cpp}
834 /// auto nominal_hx =
835 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
836 /// .Histo1D("x", "y");
837 ///
838 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
839 /// hx["nominal"].Draw();
840 /// hx["xy:0"].Draw("SAME");
841 /// hx["xy:1"].Draw("SAME");
842 /// ~~~
843 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
844 std::size_t nVariations, std::string_view variationName)
845 {
846 std::vector<std::string> variationTags;
847 variationTags.reserve(nVariations);
848 for (std::size_t i = 0u; i < nVariations; ++i)
849 variationTags.emplace_back(std::to_string(i));
850
851 return Vary(colNames, expression, std::move(variationTags), variationName);
852 }
853
854 /// Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
855 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
856 std::size_t nVariations, std::string_view variationName)
857 {
858 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
859 }
860
861 /// \brief Register systematic variations for one or more existing columns.
862 /// \param[in] colNames names of the columns for which varied values are provided.
863 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
864 /// values for the specified columns.
865 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
866 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
867 ///
868 /// ~~~{.cpp}
869 /// auto nominal_hx =
870 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
871 /// .Histo1D("x", "y");
872 ///
873 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
874 /// hx["nominal"].Draw();
875 /// hx["xy:down"].Draw("SAME");
876 /// hx["xy:up"].Draw("SAME");
877 /// ~~~
878 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
879 const std::vector<std::string> &variationTags, std::string_view variationName)
880 {
881 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
882 }
883
884 ////////////////////////////////////////////////////////////////////////////
885 /// \brief Allow to refer to a column with a different name.
886 /// \param[in] alias name of the column alias
887 /// \param[in] columnName of the column to be aliased
888 /// \return the first node of the computation graph for which the alias is available.
889 ///
890 /// Aliasing an alias is supported.
891 ///
892 /// ### Example usage:
893 /// ~~~{.cpp}
894 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
895 /// ~~~
896 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
897 {
898 // The symmetry with Define is clear. We want to:
899 // - Create globally the alias and return this very node, unchanged
900 // - Make aliases accessible based on chains and not globally
901
902 // Helper to find out if a name is a column
903 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
904
905 constexpr auto where = "Alias";
907 // If the alias name is a column name, there is a problem
909
910 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
911
913 newCols.AddAlias(alias, validColumnName);
914
915 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
916
917 return newInterface;
918 }
919
920 ////////////////////////////////////////////////////////////////////////////
921 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
922 /// \tparam ColumnTypes variadic list of branch/column types.
923 /// \param[in] treename The name of the output TTree.
924 /// \param[in] filename The name of the output TFile.
925 /// \param[in] columnList The list of names of the columns/branches to be written.
926 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
927 /// \return a `RDataFrame` that wraps the snapshotted dataset.
928 ///
929 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
930 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
931 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
932 /// written out and it appears before the array in the columnList.
933 ///
934 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
935 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
936 /// present, by default all friend top-level branches that have names that do not collide with
937 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
938 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
939 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
940 ///
941 /// ### Writing to a sub-directory
942 ///
943 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
944 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
945 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
946 ///
947 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
948 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
949 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
950 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
951 /// error out if such a "shuffled" TTree is used in a friendship.
952 ///
953 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
954 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
955 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
956 ///
957 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
958 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
959 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
960 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
961 ///
962 /// ### Example invocations:
963 ///
964 /// ~~~{.cpp}
965 /// // without specifying template parameters (column types automatically deduced)
966 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
967 ///
968 /// // specifying template parameters ("x" is `int`, "y" is `float`)
969 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
970 /// ~~~
971 ///
972 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
973 /// `RSnapshotOptions`:
974 /// ~~~{.cpp}
975 /// RSnapshotOptions opts;
976 /// opts.fLazy = true;
977 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
978 /// ~~~
979 template <typename... ColumnTypes>
981 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
982 const RSnapshotOptions &options = RSnapshotOptions())
983 {
984 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
985 }
986
987 ////////////////////////////////////////////////////////////////////////////
988 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
989 /// \param[in] treename The name of the output TTree.
990 /// \param[in] filename The name of the output TFile.
991 /// \param[in] columnList The list of names of the columns/branches to be written.
992 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
993 /// \return a `RDataFrame` that wraps the snapshotted dataset.
994 ///
995 /// This function returns a `RDataFrame` built with the output tree as a source.
996 /// The types of the columns are automatically inferred and do not need to be specified.
997 ///
998 /// See above for a more complete description and example usages.
999 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1000 const ColumnNames_t &columnList,
1001 const RSnapshotOptions &options = RSnapshotOptions())
1002 {
1003 // like columnList but with `#var` columns removed
1004 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1005 // like columnListWithoutSizeColumns but with aliases resolved
1006 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1008 // like validCols but with missing size branches required by array branches added in the right positions
1009 const auto pairOfColumnLists =
1011 std::move(colListNoAliases), std::move(colListNoPoundSizes));
1012 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1013 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1014
1015
1016 const auto fullTreeName = treename;
1017 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1018 treename = parsedTreePath.fTreeName;
1019 const auto &dirname = parsedTreePath.fDirName;
1020
1021 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
1022 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
1023 colListWithAliasesAndSizeBranches, options});
1024
1026 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, colListNoAliasesWithSizeBranches);
1027
1028 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1029 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1030 colListNoAliasesWithSizeBranches.size());
1031
1032 if (!options.fLazy)
1033 *resPtr;
1034 return resPtr;
1035 }
1036
1037 // clang-format off
1038 ////////////////////////////////////////////////////////////////////////////
1039 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1040 /// \param[in] treename The name of the output TTree.
1041 /// \param[in] filename The name of the output TFile.
1042 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1043 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1044 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1045 ///
1046 /// This function returns a `RDataFrame` built with the output tree as a source.
1047 /// The types of the columns are automatically inferred and do not need to be specified.
1048 ///
1049 /// See above for a more complete description and example usages.
1050 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1051 std::string_view columnNameRegexp = "",
1052 const RSnapshotOptions &options = RSnapshotOptions())
1053 {
1054 const auto definedColumns = fColRegister.GetNames();
1055 auto *tree = fLoopManager->GetTree();
1056 const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1057 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1058 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1059 ColumnNames_t dsColumnsWithoutSizeColumns;
1060 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1061 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1062 ColumnNames_t columnNames;
1063 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1064 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1065 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1066 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1067
1068 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1069 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1070 RDFInternal::RemoveDuplicates(columnNames);
1071
1072 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1073 return Snapshot(treename, filename, selectedColumns, options);
1074 }
1075 // clang-format on
1076
1077 // clang-format off
1078 ////////////////////////////////////////////////////////////////////////////
1079 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1080 /// \param[in] treename The name of the output TTree.
1081 /// \param[in] filename The name of the output TFile.
1082 /// \param[in] columnList The list of names of the columns/branches to be written.
1083 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1084 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1085 ///
1086 /// This function returns a `RDataFrame` built with the output tree as a source.
1087 /// The types of the columns are automatically inferred and do not need to be specified.
1088 ///
1089 /// See above for a more complete description and example usages.
1090 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1091 std::initializer_list<std::string> columnList,
1092 const RSnapshotOptions &options = RSnapshotOptions())
1093 {
1094 ColumnNames_t selectedColumns(columnList);
1095 return Snapshot(treename, filename, selectedColumns, options);
1096 }
1097 // clang-format on
1098
1099 ////////////////////////////////////////////////////////////////////////////
1100 /// \brief Save selected columns in memory.
1101 /// \tparam ColumnTypes variadic list of branch/column types.
1102 /// \param[in] columnList columns to be cached in memory.
1103 /// \return a `RDataFrame` that wraps the cached dataset.
1104 ///
1105 /// This action returns a new `RDataFrame` object, completely detached from
1106 /// the originating `RDataFrame`. The new dataframe only contains the cached
1107 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1108 ///
1109 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1110 /// fits in memory and that will be accessed many times.
1111 ///
1112 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1113 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1114 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1115 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1116 ///
1117 /// ### Example usage:
1118 ///
1119 /// **Types and columns specified:**
1120 /// ~~~{.cpp}
1121 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1122 /// ~~~
1123 ///
1124 /// **Types inferred and columns specified (this invocation relies on jitting):**
1125 /// ~~~{.cpp}
1126 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1127 /// ~~~
1128 ///
1129 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1130 /// ~~~{.cpp}
1131 /// auto cache_all_cols_df = df.Cache(myRegexp);
1132 /// ~~~
1133 template <typename... ColumnTypes>
1135 {
1136 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1137 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1138 }
1139
1140 ////////////////////////////////////////////////////////////////////////////
1141 /// \brief Save selected columns in memory.
1142 /// \param[in] columnList columns to be cached in memory
1143 /// \return a `RDataFrame` that wraps the cached dataset.
1144 ///
1145 /// See the previous overloads for more information.
1147 {
1148 // Early return: if the list of columns is empty, just return an empty RDF
1149 // If we proceed, the jitted call will not compile!
1150 if (columnList.empty()) {
1151 auto nEntries = *this->Count();
1152 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1153 return emptyRDF;
1154 }
1155
1156 std::stringstream cacheCall;
1157 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1158 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1159 fColRegister);
1160 // build a string equivalent to
1161 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1162 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1163 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1165 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1166 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1167
1168 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1169
1170 const auto validColumnNames =
1171 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1172 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1173 "Cache", /*vector2rvec=*/false);
1174 for (const auto &colType : colTypes)
1175 cacheCall << colType << ", ";
1176 if (!columnListWithoutSizeColumns.empty())
1177 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1178 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1179 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1180
1181 // book the code to jit with the RLoopManager and trigger the event loop
1182 fLoopManager->ToJitExec(cacheCall.str());
1183 fLoopManager->Jit();
1184
1185 return resRDF;
1186 }
1187
1188 ////////////////////////////////////////////////////////////////////////////
1189 /// \brief Save selected columns in memory.
1190 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1191 /// \return a `RDataFrame` that wraps the cached dataset.
1192 ///
1193 /// The existing columns are matched against the regular expression. If the string provided
1194 /// is empty, all columns are selected. See the previous overloads for more information.
1195 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1196 {
1197 const auto definedColumns = fColRegister.GetNames();
1198 auto *tree = fLoopManager->GetTree();
1199 const auto treeBranchNames =
1201 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1202 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1203 ColumnNames_t dsColumnsWithoutSizeColumns;
1204 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1205 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1206 ColumnNames_t columnNames;
1207 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1208 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1209 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1210 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1211 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1212 return Cache(selectedColumns);
1213 }
1214
1215 ////////////////////////////////////////////////////////////////////////////
1216 /// \brief Save selected columns in memory.
1217 /// \param[in] columnList columns to be cached in memory.
1218 /// \return a `RDataFrame` that wraps the cached dataset.
1219 ///
1220 /// See the previous overloads for more information.
1221 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1222 {
1223 ColumnNames_t selectedColumns(columnList);
1224 return Cache(selectedColumns);
1225 }
1226
1227 // clang-format off
1228 ////////////////////////////////////////////////////////////////////////////
1229 /// \brief Creates a node that filters entries based on range: [begin, end).
1230 /// \param[in] begin Initial entry number considered for this range.
1231 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1232 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1233 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1234 ///
1235 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1236 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1237 ///
1238 /// ### Example usage:
1239 /// ~~~{.cpp}
1240 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1241 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1242 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1243 /// ~~~
1244 // clang-format on
1245 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1246 {
1247 // check invariants
1248 if (stride == 0 || (end != 0 && end < begin))
1249 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1250 CheckIMTDisabled("Range");
1251
1252 using Range_t = RDFDetail::RRange<Proxied>;
1253 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1254 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1255 return newInterface;
1256 }
1257
1258 // clang-format off
1259 ////////////////////////////////////////////////////////////////////////////
1260 /// \brief Creates a node that filters entries based on range.
1261 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1262 /// \return a node of the computation graph for which the range is defined.
1263 ///
1264 /// See the other Range overload for a detailed description.
1265 // clang-format on
1266 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1267
1268 // clang-format off
1269 ////////////////////////////////////////////////////////////////////////////
1270 /// \brief Execute a user-defined function on each entry (*instant action*).
1271 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1272 /// \param[in] columns Names of the columns/branches in input to the user function.
1273 ///
1274 /// The callable `f` is invoked once per entry. This is an *instant action*:
1275 /// upon invocation, an event loop as well as execution of all scheduled actions
1276 /// is triggered.
1277 /// Users are responsible for the thread-safety of this callable when executing
1278 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1279 ///
1280 /// ### Example usage:
1281 /// ~~~{.cpp}
1282 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1283 /// ~~~
1284 // clang-format on
1285 template <typename F>
1286 void Foreach(F f, const ColumnNames_t &columns = {})
1287 {
1288 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1289 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1290 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1291 }
1292
1293 // clang-format off
1294 ////////////////////////////////////////////////////////////////////////////
1295 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1296 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1297 /// \param[in] columns Names of the columns/branches in input to the user function.
1298 ///
1299 /// Same as `Foreach`, but the user-defined function takes an extra
1300 /// `unsigned int` as its first parameter, the *processing slot index*.
1301 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1302 /// for each thread of execution.
1303 /// This is meant as a helper in writing thread-safe `Foreach`
1304 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1305 /// The user-defined processing callable is able to follow different
1306 /// *streams of processing* indexed by the first parameter.
1307 /// `ForeachSlot` works just as well with single-thread execution: in that
1308 /// case `slot` will always be `0`.
1309 ///
1310 /// ### Example usage:
1311 /// ~~~{.cpp}
1312 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1313 /// ~~~
1314 // clang-format on
1315 template <typename F>
1316 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1317 {
1319 constexpr auto nColumns = ColTypes_t::list_size;
1320
1321 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1322 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1323
1324 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1326
1327 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1328
1329 fLoopManager->Run();
1330 }
1331
1332 // clang-format off
1333 ////////////////////////////////////////////////////////////////////////////
1334 /// \brief Execute a user-defined reduce operation on the values of a column.
1335 /// \tparam F The type of the reduce callable. Automatically deduced.
1336 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1337 /// \param[in] f A callable with signature `T(T,T)`
1338 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1339 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1340 ///
1341 /// A reduction takes two values of a column and merges them into one (e.g.
1342 /// by summing them, taking the maximum, etc). This action performs the
1343 /// specified reduction operation on all processed column values, returning
1344 /// a single value of the same type. The callable f must satisfy the general
1345 /// requirements of a *processing function* besides having signature `T(T,T)`
1346 /// where `T` is the type of column columnName.
1347 ///
1348 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1349 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1350 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1351 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1352 /// overload.
1353 ///
1354 /// ### Example usage:
1355 /// ~~~{.cpp}
1356 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1357 /// ~~~
1358 ///
1359 /// This action is *lazy*: upon invocation of this method the calculation is
1360 /// booked but not executed. Also see RResultPtr.
1361 // clang-format on
1362 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1363 RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1364 {
1365 static_assert(
1366 std::is_default_constructible<T>::value,
1367 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1368 return Reduce(std::move(f), columnName, T());
1369 }
1370
1371 ////////////////////////////////////////////////////////////////////////////
1372 /// \brief Execute a user-defined reduce operation on the values of a column.
1373 /// \tparam F The type of the reduce callable. Automatically deduced.
1374 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1375 /// \param[in] f A callable with signature `T(T,T)`
1376 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1377 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1378 /// \return the reduced quantity wrapped in a RResultPtr.
1379 ///
1380 /// ### Example usage:
1381 /// ~~~{.cpp}
1382 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1383 /// ~~~
1384 /// See the description of the first Reduce overload for more information.
1385 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1386 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1387 {
1388 return Aggregate(f, f, columnName, redIdentity);
1389 }
1390
1391 ////////////////////////////////////////////////////////////////////////////
1392 /// \brief Return the number of entries processed (*lazy action*).
1393 /// \return the number of entries wrapped in a RResultPtr.
1394 ///
1395 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1396 /// This action is *lazy*: upon invocation of this method the calculation is
1397 /// booked but not executed. Also see RResultPtr.
1398 ///
1399 /// ### Example usage:
1400 /// ~~~{.cpp}
1401 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1402 /// ~~~
1403 ///
1405 {
1406 const auto nSlots = fLoopManager->GetNSlots();
1407 auto cSPtr = std::make_shared<ULong64_t>(0);
1408 using Helper_t = RDFInternal::CountHelper;
1410 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1412 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1413 }
1414
1415 ////////////////////////////////////////////////////////////////////////////
1416 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1417 /// \tparam T The type of the column.
1418 /// \tparam COLL The type of collection used to store the values.
1419 /// \param[in] column The name of the column to collect the values of.
1420 /// \return the content of the selected column wrapped in a RResultPtr.
1421 ///
1422 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1423 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1424 /// ### Example usage:
1425 /// ~~~{.cpp}
1426 /// // In this case intCol is a std::vector<int>
1427 /// auto intCol = rdf.Take<int>("integerColumn");
1428 /// // Same content as above but in this case taken as a RVec<int>
1429 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1430 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1431 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1432 /// ~~~
1433 /// This action is *lazy*: upon invocation of this method the calculation is
1434 /// booked but not executed. Also see RResultPtr.
1435 template <typename T, typename COLL = std::vector<T>>
1436 RResultPtr<COLL> Take(std::string_view column = "")
1437 {
1438 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1439
1440 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1441 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1442
1443 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1445 auto valuesPtr = std::make_shared<COLL>();
1446 const auto nSlots = fLoopManager->GetNSlots();
1447
1448 auto action =
1449 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1450 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1451 }
1452
1453 ////////////////////////////////////////////////////////////////////////////
1454 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1455 /// \tparam V The type of the column used to fill the histogram.
1456 /// \param[in] model The returned histogram will be constructed using this as a model.
1457 /// \param[in] vName The name of the column that will fill the histogram.
1458 /// \return the monodimensional histogram wrapped in a RResultPtr.
1459 ///
1460 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1461 /// is filled with each one of the elements of the container. In case multiple columns of container type
1462 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1463 /// possibly different lengths between events).
1464 /// This action is *lazy*: upon invocation of this method the calculation is
1465 /// booked but not executed. Also see RResultPtr.
1466 ///
1467 /// ### Example usage:
1468 /// ~~~{.cpp}
1469 /// // Deduce column type (this invocation needs jitting internally)
1470 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1471 /// // Explicit column type
1472 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1473 /// ~~~
1474 ///
1475 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1476 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1477 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1478 template <typename V = RDFDetail::RInferredType>
1479 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1480 {
1481 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1482
1483 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1484
1485 std::shared_ptr<::TH1D> h(nullptr);
1486 {
1487 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1488 h = model.GetHistogram();
1489 h->SetDirectory(nullptr);
1490 }
1491
1492 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1493 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1494 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1495 }
1496
1497 ////////////////////////////////////////////////////////////////////////////
1498 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1499 /// \tparam V The type of the column used to fill the histogram.
1500 /// \param[in] vName The name of the column that will fill the histogram.
1501 /// \return the monodimensional histogram wrapped in a RResultPtr.
1502 ///
1503 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1504 /// The "name" and "title" strings are built starting from the input column name.
1505 /// See the description of the first Histo1D() overload for more details.
1506 ///
1507 /// ### Example usage:
1508 /// ~~~{.cpp}
1509 /// // Deduce column type (this invocation needs jitting internally)
1510 /// auto myHist1 = myDf.Histo1D("myColumn");
1511 /// // Explicit column type
1512 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1513 /// ~~~
1514 template <typename V = RDFDetail::RInferredType>
1515 RResultPtr<::TH1D> Histo1D(std::string_view vName)
1516 {
1517 const auto h_name = std::string(vName);
1518 const auto h_title = h_name + ";" + h_name + ";count";
1519 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1520 }
1521
1522 ////////////////////////////////////////////////////////////////////////////
1523 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1524 /// \tparam V The type of the column used to fill the histogram.
1525 /// \tparam W The type of the column used as weights.
1526 /// \param[in] model The returned histogram will be constructed using this as a model.
1527 /// \param[in] vName The name of the column that will fill the histogram.
1528 /// \param[in] wName The name of the column that will provide the weights.
1529 /// \return the monodimensional histogram wrapped in a RResultPtr.
1530 ///
1531 /// See the description of the first Histo1D() overload for more details.
1532 ///
1533 /// ### Example usage:
1534 /// ~~~{.cpp}
1535 /// // Deduce column type (this invocation needs jitting internally)
1536 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1537 /// // Explicit column type
1538 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1539 /// ~~~
1540 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1541 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1542 {
1543 const std::vector<std::string_view> columnViews = {vName, wName};
1544 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1545 ? ColumnNames_t()
1546 : ColumnNames_t(columnViews.begin(), columnViews.end());
1547 std::shared_ptr<::TH1D> h(nullptr);
1548 {
1549 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1550 h = model.GetHistogram();
1551 }
1552 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1553 }
1554
1555 ////////////////////////////////////////////////////////////////////////////
1556 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1557 /// \tparam V The type of the column used to fill the histogram.
1558 /// \tparam W The type of the column used as weights.
1559 /// \param[in] vName The name of the column that will fill the histogram.
1560 /// \param[in] wName The name of the column that will provide the weights.
1561 /// \return the monodimensional histogram wrapped in a RResultPtr.
1562 ///
1563 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1564 /// The "name" and "title" strings are built starting from the input column names.
1565 /// See the description of the first Histo1D() overload for more details.
1566 ///
1567 /// ### Example usage:
1568 /// ~~~{.cpp}
1569 /// // Deduce column types (this invocation needs jitting internally)
1570 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1571 /// // Explicit column types
1572 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1573 /// ~~~
1574 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1575 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1576 {
1577 // We build name and title based on the value and weight column names
1578 std::string str_vName{vName};
1579 std::string str_wName{wName};
1580 const auto h_name = str_vName + "_weighted_" + str_wName;
1581 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1582 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1583 }
1584
1585 ////////////////////////////////////////////////////////////////////////////
1586 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1587 /// \tparam V The type of the column used to fill the histogram.
1588 /// \tparam W The type of the column used as weights.
1589 /// \param[in] model The returned histogram will be constructed using this as a model.
1590 /// \return the monodimensional histogram wrapped in a RResultPtr.
1591 ///
1592 /// This overload will use the first two default columns as column names.
1593 /// See the description of the first Histo1D() overload for more details.
1594 template <typename V, typename W>
1595 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1596 {
1597 return Histo1D<V, W>(model, "", "");
1598 }
1599
1600 ////////////////////////////////////////////////////////////////////////////
1601 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1602 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1603 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1604 /// \param[in] model The returned histogram will be constructed using this as a model.
1605 /// \param[in] v1Name The name of the column that will fill the x axis.
1606 /// \param[in] v2Name The name of the column that will fill the y axis.
1607 /// \return the bidimensional histogram wrapped in a RResultPtr.
1608 ///
1609 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1610 /// is filled with each one of the elements of the container. In case multiple columns of container type
1611 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1612 /// possibly different lengths between events).
1613 /// This action is *lazy*: upon invocation of this method the calculation is
1614 /// booked but not executed. Also see RResultPtr.
1615 ///
1616 /// ### Example usage:
1617 /// ~~~{.cpp}
1618 /// // Deduce column types (this invocation needs jitting internally)
1619 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1620 /// // Explicit column types
1621 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1622 /// ~~~
1623 ///
1624 ///
1625 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1626 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1627 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1628 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1629 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1630 {
1631 std::shared_ptr<::TH2D> h(nullptr);
1632 {
1633 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1634 h = model.GetHistogram();
1635 }
1636 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1637 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1638 }
1639 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1640 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1641 ? ColumnNames_t()
1642 : ColumnNames_t(columnViews.begin(), columnViews.end());
1643 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
1644 }
1645
1646 ////////////////////////////////////////////////////////////////////////////
1647 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1648 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1649 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1650 /// \tparam W The type of the column used for the weights of the histogram.
1651 /// \param[in] model The returned histogram will be constructed using this as a model.
1652 /// \param[in] v1Name The name of the column that will fill the x axis.
1653 /// \param[in] v2Name The name of the column that will fill the y axis.
1654 /// \param[in] wName The name of the column that will provide the weights.
1655 /// \return the bidimensional histogram wrapped in a RResultPtr.
1656 ///
1657 /// This action is *lazy*: upon invocation of this method the calculation is
1658 /// booked but not executed. Also see RResultPtr.
1659 ///
1660 /// ### Example usage:
1661 /// ~~~{.cpp}
1662 /// // Deduce column types (this invocation needs jitting internally)
1663 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1664 /// // Explicit column types
1665 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1666 /// ~~~
1667 ///
1668 /// See the documentation of the first Histo2D() overload for more details.
1669 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1670 typename W = RDFDetail::RInferredType>
1672 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1673 {
1674 std::shared_ptr<::TH2D> h(nullptr);
1675 {
1676 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1677 h = model.GetHistogram();
1678 }
1679 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1680 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1681 }
1682 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1683 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1684 ? ColumnNames_t()
1685 : ColumnNames_t(columnViews.begin(), columnViews.end());
1686 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
1687 }
1688
1689 template <typename V1, typename V2, typename W>
1691 {
1692 return Histo2D<V1, V2, W>(model, "", "", "");
1693 }
1694
1695 ////////////////////////////////////////////////////////////////////////////
1696 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1697 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1698 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1699 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1700 /// \param[in] model The returned histogram will be constructed using this as a model.
1701 /// \param[in] v1Name The name of the column that will fill the x axis.
1702 /// \param[in] v2Name The name of the column that will fill the y axis.
1703 /// \param[in] v3Name The name of the column that will fill the z axis.
1704 /// \return the tridimensional histogram wrapped in a RResultPtr.
1705 ///
1706 /// This action is *lazy*: upon invocation of this method the calculation is
1707 /// booked but not executed. Also see RResultPtr.
1708 ///
1709 /// ### Example usage:
1710 /// ~~~{.cpp}
1711 /// // Deduce column types (this invocation needs jitting internally)
1712 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1713 /// "myValueX", "myValueY", "myValueZ");
1714 /// // Explicit column types
1715 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1716 /// "myValueX", "myValueY", "myValueZ");
1717 /// ~~~
1718 ///
1719 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1720 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1721 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1722 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1723 typename V3 = RDFDetail::RInferredType>
1724 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
1725 std::string_view v3Name = "")
1726 {
1727 std::shared_ptr<::TH3D> h(nullptr);
1728 {
1729 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1730 h = model.GetHistogram();
1731 }
1732 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1733 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1734 }
1735 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1736 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1737 ? ColumnNames_t()
1738 : ColumnNames_t(columnViews.begin(), columnViews.end());
1739 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
1740 }
1741
1742 ////////////////////////////////////////////////////////////////////////////
1743 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1744 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1745 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1746 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1747 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1748 /// \param[in] model The returned histogram will be constructed using this as a model.
1749 /// \param[in] v1Name The name of the column that will fill the x axis.
1750 /// \param[in] v2Name The name of the column that will fill the y axis.
1751 /// \param[in] v3Name The name of the column that will fill the z axis.
1752 /// \param[in] wName The name of the column that will provide the weights.
1753 /// \return the tridimensional histogram wrapped in a RResultPtr.
1754 ///
1755 /// This action is *lazy*: upon invocation of this method the calculation is
1756 /// booked but not executed. Also see RResultPtr.
1757 ///
1758 /// ### Example usage:
1759 /// ~~~{.cpp}
1760 /// // Deduce column types (this invocation needs jitting internally)
1761 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1762 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1763 /// // Explicit column types
1764 /// using d_t = double;
1765 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1766 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1767 /// ~~~
1768 ///
1769 ///
1770 /// See the documentation of the first Histo2D() overload for more details.
1771 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1772 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1773 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
1774 std::string_view v3Name, std::string_view wName)
1775 {
1776 std::shared_ptr<::TH3D> h(nullptr);
1777 {
1778 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1779 h = model.GetHistogram();
1780 }
1781 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1782 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1783 }
1784 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1785 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1786 ? ColumnNames_t()
1787 : ColumnNames_t(columnViews.begin(), columnViews.end());
1788 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
1789 }
1790
1791 template <typename V1, typename V2, typename V3, typename W>
1793 {
1794 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1795 }
1796
1797 ////////////////////////////////////////////////////////////////////////////
1798 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1799 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1800 /// present.
1801 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1802 /// object.
1803 /// \param[in] model The returned histogram will be constructed using this as a model.
1804 /// \param[in] columnList
1805 /// A list containing the names of the columns that will be passed when calling `Fill`.
1806 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1807 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1808 ///
1809 /// This action is *lazy*: upon invocation of this method the calculation is
1810 /// booked but not executed. See RResultPtr documentation.
1811 ///
1812 /// ### Example usage:
1813 /// ~~~{.cpp}
1814 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1815 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1816 /// {"col0", "col1", "col2", "col3"});
1817 /// ~~~
1818 ///
1819 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1820 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1821 {
1822 std::shared_ptr<::THnD> h(nullptr);
1823 {
1824 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1825 h = model.GetHistogram();
1826
1827 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1828 h->Sumw2();
1829 } else if (int(columnList.size()) != h->GetNdimensions()) {
1830 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1831 }
1832 }
1833 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
1834 fProxiedPtr);
1835 }
1836
1837 ////////////////////////////////////////////////////////////////////////////
1838 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1839 /// \param[in] model The returned histogram will be constructed using this as a model.
1840 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1841 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1842 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1843 ///
1844 /// This action is *lazy*: upon invocation of this method the calculation is
1845 /// booked but not executed. Also see RResultPtr.
1846 ///
1847 /// ### Example usage:
1848 /// ~~~{.cpp}
1849 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
1850 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1851 /// {"col0", "col1", "col2", "col3"});
1852 /// ~~~
1853 ///
1854 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1855 {
1856 std::shared_ptr<::THnD> h(nullptr);
1857 {
1858 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1859 h = model.GetHistogram();
1860
1861 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1862 h->Sumw2();
1863 } else if (int(columnList.size()) != h->GetNdimensions()) {
1864 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1865 }
1866 }
1867 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
1868 columnList.size());
1869 }
1870
1871 ////////////////////////////////////////////////////////////////////////////
1872 /// \brief Fill and return a TGraph object (*lazy action*).
1873 /// \tparam X The type of the column used to fill the x axis.
1874 /// \tparam Y The type of the column used to fill the y axis.
1875 /// \param[in] x The name of the column that will fill the x axis.
1876 /// \param[in] y The name of the column that will fill the y axis.
1877 /// \return the TGraph wrapped in a RResultPtr.
1878 ///
1879 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
1880 /// is filled with each one of the elements of the container.
1881 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1882 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1883 /// A name and a title to the TGraph is given based on the input column names.
1884 ///
1885 /// This action is *lazy*: upon invocation of this method the calculation is
1886 /// booked but not executed. Also see RResultPtr.
1887 ///
1888 /// ### Example usage:
1889 /// ~~~{.cpp}
1890 /// // Deduce column types (this invocation needs jitting internally)
1891 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1892 /// // Explicit column types
1893 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1894 /// ~~~
1895 ///
1896 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
1897 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1898 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1899 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
1900 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
1901 {
1902 auto graph = std::make_shared<::TGraph>();
1903 const std::vector<std::string_view> columnViews = {x, y};
1904 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1905 ? ColumnNames_t()
1906 : ColumnNames_t(columnViews.begin(), columnViews.end());
1907
1908 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1909
1910 // We build a default name and title based on the input columns
1911 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
1912 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
1913 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1914 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1915 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1916
1917 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
1918 }
1919
1920 ////////////////////////////////////////////////////////////////////////////
1921 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
1922 /// \param[in] x The name of the column that will fill the x axis.
1923 /// \param[in] y The name of the column that will fill the y axis.
1924 /// \param[in] exl The name of the column of X low errors
1925 /// \param[in] exh The name of the column of X high errors
1926 /// \param[in] eyl The name of the column of Y low errors
1927 /// \param[in] eyh The name of the column of Y high errors
1928 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
1929 ///
1930 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1931 /// is filled with each one of the elements of the container.
1932 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1933 ///
1934 /// This action is *lazy*: upon invocation of this method the calculation is
1935 /// booked but not executed. Also see RResultPtr.
1936 ///
1937 /// ### Example usage:
1938 /// ~~~{.cpp}
1939 /// // Deduce column types (this invocation needs jitting internally)
1940 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
1941 /// // Explicit column types
1942 /// using f = float
1943 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
1944 /// ~~~
1945 ///
1946 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
1947 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1948 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1949 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
1950 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
1951 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
1953 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
1954 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
1955 {
1956 auto graph = std::make_shared<::TGraphAsymmErrors>();
1957 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
1958 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1959 ? ColumnNames_t()
1960 : ColumnNames_t(columnViews.begin(), columnViews.end());
1961
1962 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
1963
1964 // We build a default name and title based on the input columns
1965 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
1966 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
1967 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1968 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1969 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1970
1971 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
1973 }
1974
1975 ////////////////////////////////////////////////////////////////////////////
1976 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1977 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1978 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1979 /// \param[in] model The model to be considered to build the new return value.
1980 /// \param[in] v1Name The name of the column that will fill the x axis.
1981 /// \param[in] v2Name The name of the column that will fill the y axis.
1982 /// \return the monodimensional profile wrapped in a RResultPtr.
1983 ///
1984 /// This action is *lazy*: upon invocation of this method the calculation is
1985 /// booked but not executed. Also see RResultPtr.
1986 ///
1987 /// ### Example usage:
1988 /// ~~~{.cpp}
1989 /// // Deduce column types (this invocation needs jitting internally)
1990 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1991 /// // Explicit column types
1992 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1993 /// ~~~
1994 ///
1995 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1996 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1997 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1998 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2000 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2001 {
2002 std::shared_ptr<::TProfile> h(nullptr);
2003 {
2004 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2005 h = model.GetProfile();
2006 }
2007
2008 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2009 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2010 }
2011 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2012 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2013 ? ColumnNames_t()
2014 : ColumnNames_t(columnViews.begin(), columnViews.end());
2015 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2016 }
2017
2018 ////////////////////////////////////////////////////////////////////////////
2019 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2020 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2021 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2022 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2023 /// \param[in] model The model to be considered to build the new return value.
2024 /// \param[in] v1Name The name of the column that will fill the x axis.
2025 /// \param[in] v2Name The name of the column that will fill the y axis.
2026 /// \param[in] wName The name of the column that will provide the weights.
2027 /// \return the monodimensional profile wrapped in a RResultPtr.
2028 ///
2029 /// This action is *lazy*: upon invocation of this method the calculation is
2030 /// booked but not executed. Also see RResultPtr.
2031 ///
2032 /// ### Example usage:
2033 /// ~~~{.cpp}
2034 /// // Deduce column types (this invocation needs jitting internally)
2035 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2036 /// // Explicit column types
2037 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2038 /// "xValues", "yValues", "weight");
2039 /// ~~~
2040 ///
2041 /// See the first Profile1D() overload for more details.
2042 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2043 typename W = RDFDetail::RInferredType>
2045 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2046 {
2047 std::shared_ptr<::TProfile> h(nullptr);
2048 {
2049 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2050 h = model.GetProfile();
2051 }
2052
2053 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2054 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2055 }
2056 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2057 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2058 ? ColumnNames_t()
2059 : ColumnNames_t(columnViews.begin(), columnViews.end());
2060 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2061 }
2062
2063 ////////////////////////////////////////////////////////////////////////////
2064 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2065 /// See the first Profile1D() overload for more details.
2066 template <typename V1, typename V2, typename W>
2068 {
2069 return Profile1D<V1, V2, W>(model, "", "", "");
2070 }
2071
2072 ////////////////////////////////////////////////////////////////////////////
2073 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2074 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2075 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2076 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2077 /// \param[in] model The returned profile will be constructed using this as a model.
2078 /// \param[in] v1Name The name of the column that will fill the x axis.
2079 /// \param[in] v2Name The name of the column that will fill the y axis.
2080 /// \param[in] v3Name The name of the column that will fill the z axis.
2081 /// \return the bidimensional profile wrapped in a RResultPtr.
2082 ///
2083 /// This action is *lazy*: upon invocation of this method the calculation is
2084 /// booked but not executed. Also see RResultPtr.
2085 ///
2086 /// ### Example usage:
2087 /// ~~~{.cpp}
2088 /// // Deduce column types (this invocation needs jitting internally)
2089 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2090 /// "xValues", "yValues", "zValues");
2091 /// // Explicit column types
2092 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2093 /// "xValues", "yValues", "zValues");
2094 /// ~~~
2095 ///
2096 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2097 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2098 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2099 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2100 typename V3 = RDFDetail::RInferredType>
2101 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2102 std::string_view v2Name = "", std::string_view v3Name = "")
2103 {
2104 std::shared_ptr<::TProfile2D> h(nullptr);
2105 {
2106 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2107 h = model.GetProfile();
2108 }
2109
2110 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2111 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2112 }
2113 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2114 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2115 ? ColumnNames_t()
2116 : ColumnNames_t(columnViews.begin(), columnViews.end());
2117 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2118 }
2119
2120 ////////////////////////////////////////////////////////////////////////////
2121 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2122 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2123 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2124 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2125 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2126 /// \param[in] model The returned histogram will be constructed using this as a model.
2127 /// \param[in] v1Name The name of the column that will fill the x axis.
2128 /// \param[in] v2Name The name of the column that will fill the y axis.
2129 /// \param[in] v3Name The name of the column that will fill the z axis.
2130 /// \param[in] wName The name of the column that will provide the weights.
2131 /// \return the bidimensional profile wrapped in a RResultPtr.
2132 ///
2133 /// This action is *lazy*: upon invocation of this method the calculation is
2134 /// booked but not executed. Also see RResultPtr.
2135 ///
2136 /// ### Example usage:
2137 /// ~~~{.cpp}
2138 /// // Deduce column types (this invocation needs jitting internally)
2139 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2140 /// "xValues", "yValues", "zValues", "weight");
2141 /// // Explicit column types
2142 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2143 /// "xValues", "yValues", "zValues", "weight");
2144 /// ~~~
2145 ///
2146 /// See the first Profile2D() overload for more details.
2147 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2148 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2149 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2150 std::string_view v3Name, std::string_view wName)
2151 {
2152 std::shared_ptr<::TProfile2D> h(nullptr);
2153 {
2154 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2155 h = model.GetProfile();
2156 }
2157
2158 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2159 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2160 }
2161 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2162 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2163 ? ColumnNames_t()
2164 : ColumnNames_t(columnViews.begin(), columnViews.end());
2165 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2166 }
2167
2168 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2169 /// See the first Profile2D() overload for more details.
2170 template <typename V1, typename V2, typename V3, typename W>
2172 {
2173 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2174 }
2175
2176 ////////////////////////////////////////////////////////////////////////////
2177 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2178 ///
2179 /// Type T must provide at least:
2180 /// - a copy-constructor
2181 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2182 /// (these types can also be passed as template parameters to this method)
2183 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2184 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2185 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2186 /// the TCollection*).
2187 ///
2188 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2189 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2190 /// \tparam T The type of the object to fill. Automatically deduced.
2191 /// \param[in] model The model to be considered to build the new return value.
2192 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2193 /// \return the filled object wrapped in a RResultPtr.
2194 ///
2195 /// The user gives up ownership of the model object.
2196 /// The list of column names to be used for filling must always be specified.
2197 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2198 /// Also see RResultPtr.
2199 ///
2200 /// ### Example usage:
2201 /// ~~~{.cpp}
2202 /// MyClass obj;
2203 /// // Deduce column types (this invocation needs jitting internally, and in this case
2204 /// // MyClass needs to be known to the interpreter)
2205 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2206 /// // explicit column types
2207 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2208 /// ~~~
2209 ///
2210 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2211 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2212 {
2213 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2214 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2215 throw std::runtime_error("The absence of axes limits is not supported yet.");
2216 }
2217 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2218 columnList.size());
2219 }
2220
2221 ////////////////////////////////////////////////////////////////////////////
2222 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2223 ///
2224 /// \tparam V The type of the value column
2225 /// \param[in] value The name of the column with the values to fill the statistics with.
2226 /// \return the filled TStatistic object wrapped in a RResultPtr.
2227 ///
2228 /// ### Example usage:
2229 /// ~~~{.cpp}
2230 /// // Deduce column type (this invocation needs jitting internally)
2231 /// auto stats0 = myDf.Stats("values");
2232 /// // Explicit column type
2233 /// auto stats1 = myDf.Stats<float>("values");
2234 /// ~~~
2235 ///
2236 template <typename V = RDFDetail::RInferredType>
2237 RResultPtr<TStatistic> Stats(std::string_view value = "")
2238 {
2239 ColumnNames_t columns;
2240 if (!value.empty()) {
2241 columns.emplace_back(std::string(value));
2242 }
2243 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2244 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2245 return Fill(TStatistic(), validColumnNames);
2246 } else {
2247 return Fill<V>(TStatistic(), validColumnNames);
2248 }
2249 }
2250
2251 ////////////////////////////////////////////////////////////////////////////
2252 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2253 ///
2254 /// \tparam V The type of the value column
2255 /// \tparam W The type of the weight column
2256 /// \param[in] value The name of the column with the values to fill the statistics with.
2257 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2258 /// \return the filled TStatistic object wrapped in a RResultPtr.
2259 ///
2260 /// ### Example usage:
2261 /// ~~~{.cpp}
2262 /// // Deduce column types (this invocation needs jitting internally)
2263 /// auto stats0 = myDf.Stats("values", "weights");
2264 /// // Explicit column types
2265 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2266 /// ~~~
2267 ///
2268 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2269 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2270 {
2271 ColumnNames_t columns{std::string(value), std::string(weight)};
2272 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2273 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2274 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2275 // We have 3 cases:
2276 // 1. Both types are inferred: we use Fill and let the jit kick in.
2277 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2278 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2279 if (vIsInferred && wIsInferred) {
2280 return Fill(TStatistic(), validColumnNames);
2281 } else if (vIsInferred != wIsInferred) {
2282 std::string error("The ");
2283 error += vIsInferred ? "value " : "weight ";
2284 error += "column type is explicit, while the ";
2285 error += vIsInferred ? "weight " : "value ";
2286 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2287 throw std::runtime_error(error);
2288 } else {
2289 return Fill<V, W>(TStatistic(), validColumnNames);
2290 }
2291 }
2292
2293 ////////////////////////////////////////////////////////////////////////////
2294 /// \brief Return the minimum of processed column values (*lazy action*).
2295 /// \tparam T The type of the branch/column.
2296 /// \param[in] columnName The name of the branch/column to be treated.
2297 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2298 ///
2299 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2300 /// template specialization of this method.
2301 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2302 ///
2303 /// This action is *lazy*: upon invocation of this method the calculation is
2304 /// booked but not executed. Also see RResultPtr.
2305 ///
2306 /// ### Example usage:
2307 /// ~~~{.cpp}
2308 /// // Deduce column type (this invocation needs jitting internally)
2309 /// auto minVal0 = myDf.Min("values");
2310 /// // Explicit column type
2311 /// auto minVal1 = myDf.Min<double>("values");
2312 /// ~~~
2313 ///
2314 template <typename T = RDFDetail::RInferredType>
2315 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2316 {
2317 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2318 using RetType_t = RDFDetail::MinReturnType_t<T>;
2319 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2320 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2321 }
2322
2323 ////////////////////////////////////////////////////////////////////////////
2324 /// \brief Return the maximum of processed column values (*lazy action*).
2325 /// \tparam T The type of the branch/column.
2326 /// \param[in] columnName The name of the branch/column to be treated.
2327 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2328 ///
2329 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2330 /// template specialization of this method.
2331 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2332 ///
2333 /// This action is *lazy*: upon invocation of this method the calculation is
2334 /// booked but not executed. Also see RResultPtr.
2335 ///
2336 /// ### Example usage:
2337 /// ~~~{.cpp}
2338 /// // Deduce column type (this invocation needs jitting internally)
2339 /// auto maxVal0 = myDf.Max("values");
2340 /// // Explicit column type
2341 /// auto maxVal1 = myDf.Max<double>("values");
2342 /// ~~~
2343 ///
2344 template <typename T = RDFDetail::RInferredType>
2345 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2346 {
2347 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2348 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2349 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2350 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2351 }
2352
2353 ////////////////////////////////////////////////////////////////////////////
2354 /// \brief Return the mean of processed column values (*lazy action*).
2355 /// \tparam T The type of the branch/column.
2356 /// \param[in] columnName The name of the branch/column to be treated.
2357 /// \return the mean value of the selected column wrapped in a RResultPtr.
2358 ///
2359 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2360 /// template specialization of this method.
2361 ///
2362 /// This action is *lazy*: upon invocation of this method the calculation is
2363 /// booked but not executed. Also see RResultPtr.
2364 ///
2365 /// ### Example usage:
2366 /// ~~~{.cpp}
2367 /// // Deduce column type (this invocation needs jitting internally)
2368 /// auto meanVal0 = myDf.Mean("values");
2369 /// // Explicit column type
2370 /// auto meanVal1 = myDf.Mean<double>("values");
2371 /// ~~~
2372 ///
2373 template <typename T = RDFDetail::RInferredType>
2374 RResultPtr<double> Mean(std::string_view columnName = "")
2375 {
2376 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2377 auto meanV = std::make_shared<double>(0);
2378 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2379 }
2380
2381 ////////////////////////////////////////////////////////////////////////////
2382 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2383 /// \tparam T The type of the branch/column.
2384 /// \param[in] columnName The name of the branch/column to be treated.
2385 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2386 ///
2387 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2388 /// template specialization of this method.
2389 ///
2390 /// This action is *lazy*: upon invocation of this method the calculation is
2391 /// booked but not executed. Also see RResultPtr.
2392 ///
2393 /// ### Example usage:
2394 /// ~~~{.cpp}
2395 /// // Deduce column type (this invocation needs jitting internally)
2396 /// auto stdDev0 = myDf.StdDev("values");
2397 /// // Explicit column type
2398 /// auto stdDev1 = myDf.StdDev<double>("values");
2399 /// ~~~
2400 ///
2401 template <typename T = RDFDetail::RInferredType>
2402 RResultPtr<double> StdDev(std::string_view columnName = "")
2403 {
2404 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2405 auto stdDeviationV = std::make_shared<double>(0);
2406 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2407 }
2408
2409 // clang-format off
2410 ////////////////////////////////////////////////////////////////////////////
2411 /// \brief Return the sum of processed column values (*lazy action*).
2412 /// \tparam T The type of the branch/column.
2413 /// \param[in] columnName The name of the branch/column.
2414 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2415 /// \return the sum of the selected column wrapped in a RResultPtr.
2416 ///
2417 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2418 /// template specialization of this method.
2419 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2420 ///
2421 /// This action is *lazy*: upon invocation of this method the calculation is
2422 /// booked but not executed. Also see RResultPtr.
2423 ///
2424 /// ### Example usage:
2425 /// ~~~{.cpp}
2426 /// // Deduce column type (this invocation needs jitting internally)
2427 /// auto sum0 = myDf.Sum("values");
2428 /// // Explicit column type
2429 /// auto sum1 = myDf.Sum<double>("values");
2430 /// ~~~
2431 ///
2432 template <typename T = RDFDetail::RInferredType>
2434 Sum(std::string_view columnName = "",
2435 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2436 {
2437 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2438 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2439 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2440 }
2441 // clang-format on
2442
2443 ////////////////////////////////////////////////////////////////////////////
2444 /// \brief Gather filtering statistics.
2445 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2446 ///
2447 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2448 /// all named filters in the call graph. Calling this method on a
2449 /// stored chain state (i.e. a graph node different from the first) gathers
2450 /// the stats for all named filters in the chain section between the original
2451 /// `RDataFrame` and that node (included). Stats are gathered in the same
2452 /// order as the named filters have been added to the graph.
2453 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2454 /// effects cuts had.
2455 ///
2456 /// This action is *lazy*: upon invocation of
2457 /// this method the calculation is booked but not executed. See RResultPtr
2458 /// documentation.
2459 ///
2460 /// ### Example usage:
2461 /// ~~~{.cpp}
2462 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2463 /// auto cutReport = filtered3.Report();
2464 /// cutReport->Print();
2465 /// ~~~
2466 ///
2468 {
2469 bool returnEmptyReport = false;
2470 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2471 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2472 // certainly does not contain named filters.
2473 // The number 4 takes into account the implicit columns for entry and slot number
2474 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2475 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GetNames().size() > 4)
2476 returnEmptyReport = true;
2477
2478 auto rep = std::make_shared<RCutFlowReport>();
2479 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2481
2482 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2484
2485 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2486 }
2487
2488 /// \brief Returns the names of the filters created.
2489 /// \return the container of filters names.
2490 ///
2491 /// If called on a root node, all the filters in the computation graph will
2492 /// be printed. For any other node, only the filters upstream of that node.
2493 /// Filters without a name are printed as "Unnamed Filter"
2494 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2495 ///
2496 /// ### Example usage:
2497 /// ~~~{.cpp}
2498 /// auto filtNames = d.GetFilterNames();
2499 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2500 /// ~~~
2501 ///
2502 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2503
2504 // clang-format off
2505 ////////////////////////////////////////////////////////////////////////////
2506 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2507 /// \tparam F The type of the aggregator callable. Automatically deduced.
2508 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2509 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2510 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2511 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2512 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2513 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2514 /// \return the result of the aggregation wrapped in a RResultPtr.
2515 ///
2516 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2517 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2518 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2519 /// the value of the column columnName.
2520 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2521 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2522 ///
2523 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2524 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2525 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2526 ///
2527 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2528 ///
2529 /// Example usage:
2530 /// ~~~{.cpp}
2531 /// auto aggregator = [](double acc, double x) { return acc * x; };
2532 /// ROOT::EnableImplicitMT();
2533 /// // If multithread is enabled, the aggregator function will be called by more threads
2534 /// // and will produce a vector of partial accumulators.
2535 /// // The merger function performs the final aggregation of these partial results.
2536 /// auto merger = [](std::vector<double> &accumulators) {
2537 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2538 /// accumulators[0] *= accumulators[i];
2539 /// }
2540 /// };
2541 ///
2542 /// // The accumulator is initialized at this value by every thread.
2543 /// double initValue = 1.;
2544 ///
2545 /// // Multiplies all elements of the column "x"
2546 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2547 /// ~~~
2548 // clang-format on
2549 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2550 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2551 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2552 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2553 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2554 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2555 {
2556 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2557 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2558
2559 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2560 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2561
2562 auto accObjPtr = std::make_shared<U>(aggIdentity);
2563 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2565 auto action = std::make_unique<Action_t>(
2566 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2568 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2569 }
2570
2571 // clang-format off
2572 ////////////////////////////////////////////////////////////////////////////
2573 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2574 /// \tparam F The type of the aggregator callable. Automatically deduced.
2575 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2576 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2577 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2578 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2579 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2580 /// \return the result of the aggregation wrapped in a RResultPtr.
2581 ///
2582 /// See previous Aggregate overload for more information.
2583 // clang-format on
2584 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2585 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2586 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2587 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2588 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2589 {
2590 static_assert(
2591 std::is_default_constructible<U>::value,
2592 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2593 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2594 }
2595
2596 // clang-format off
2597 ////////////////////////////////////////////////////////////////////////////
2598 /// \brief Book execution of a custom action using a user-defined helper object.
2599 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2600 /// \tparam OtherColumns A list of the types of the other columns used by this action
2601 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2602 /// \param[in] helper The Action Helper to be scheduled.
2603 /// \param[in] columns The names of the columns on which the helper acts.
2604 /// \return the result of the helper wrapped in a RResultPtr.
2605 ///
2606 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2607 /// Helper object provided by the caller. The required interface for the helper is described below (more
2608 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2609 ///
2610 /// ### Mandatory interface
2611 ///
2612 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
2613 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
2614 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
2615 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
2616 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2617 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
2618 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
2619 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2620 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2621 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
2622 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
2623 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
2624 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
2625 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
2626 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2627 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2628 /// the requested columns for the particular entry being processed.
2629 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2630 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
2631 /// diagnostics, SaveGraph(), etc.
2632 ///
2633 /// ### Optional methods
2634 ///
2635 /// If these methods are implemented they enable extra functionality as per the description below.
2636 ///
2637 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
2638 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers.
2639 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
2640 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
2641 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2642 /// of every sample, as in DefinePerSample().
2643 /// * `Helper MakeNew(void *newResult)`: if implemented, it enables varying the action's result with VariationsFor(). It takes a
2644 /// type-erased new result that can be safely cast to a `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should
2645 /// be used as the action's output result.
2646 ///
2647 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
2648 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
2649 ///
2650 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2651 ///
2652 /// ### Examples
2653 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
2654 ///
2655 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2656 ///
2657 // clang-format on
2658 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2660 {
2661 using HelperT = std::decay_t<Helper>;
2662 // TODO add more static sanity checks on Helper
2664 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2665 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2666
2667 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2668 auto resPtr = hPtr->GetResultPtr();
2669
2670 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2671 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2672 } else {
2673 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2674 fProxiedPtr, columns.size());
2675 }
2676 }
2677
2678 ////////////////////////////////////////////////////////////////////////////
2679 /// \brief Provides a representation of the columns in the dataset.
2680 /// \tparam ColumnTypes variadic list of branch/column types.
2681 /// \param[in] columnList Names of the columns to be displayed.
2682 /// \param[in] nRows Number of events for each column to be displayed.
2683 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2684 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2685 ///
2686 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2687 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
2688 /// return a complete version through `RDisplay::AsString()`.
2689 ///
2690 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2691 /// RResultPtr.
2692 ///
2693 /// Example usage:
2694 /// ~~~{.cpp}
2695 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2696 /// auto d1 = rdf.Display("");
2697 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2698 /// auto d2 = d.Display({"x", "y"}, 128);
2699 /// // Printing the short representations, the event loop will run
2700 /// d1->Print();
2701 /// d2->Print();
2702 /// ~~~
2703 template <typename... ColumnTypes>
2704 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2705 {
2706 CheckIMTDisabled("Display");
2707 auto newCols = columnList;
2708 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2709 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2710 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2711 // Need to add ULong64_t type corresponding to the first column rdfentry_
2712 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
2713 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
2714 }
2715
2716 ////////////////////////////////////////////////////////////////////////////
2717 /// \brief Provides a representation of the columns in the dataset.
2718 /// \param[in] columnList Names of the columns to be displayed.
2719 /// \param[in] nRows Number of events for each column to be displayed.
2720 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2721 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2722 ///
2723 /// This overload automatically infers the column types.
2724 /// See the previous overloads for further details.
2725 ///
2726 /// Invoked when no types are specified to Display
2727 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2728 {
2729 CheckIMTDisabled("Display");
2730 auto newCols = columnList;
2731 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2732 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2733 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2734 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
2735 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
2736 columnList.size() + 1);
2737 }
2738
2739 ////////////////////////////////////////////////////////////////////////////
2740 /// \brief Provides a representation of the columns in the dataset.
2741 /// \param[in] columnNameRegexp A regular expression to select the columns.
2742 /// \param[in] nRows Number of events for each column to be displayed.
2743 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2744 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2745 ///
2746 /// The existing columns are matched against the regular expression. If the string provided
2747 /// is empty, all columns are selected.
2748 /// See the previous overloads for further details.
2750 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
2751 {
2752 const auto columnNames = GetColumnNames();
2753 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
2754 return Display(selectedColumns, nRows, nMaxCollectionElements);
2755 }
2756
2757 ////////////////////////////////////////////////////////////////////////////
2758 /// \brief Provides a representation of the columns in the dataset.
2759 /// \param[in] columnList Names of the columns to be displayed.
2760 /// \param[in] nRows Number of events for each column to be displayed.
2761 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
2762 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2763 ///
2764 /// See the previous overloads for further details.
2766 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2767 {
2768 ColumnNames_t selectedColumns(columnList);
2769 return Display(selectedColumns, nRows, nMaxCollectionElements);
2770 }
2771
2772private:
2773 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2774 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
2775 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
2776 {
2777 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
2781 } else {
2785 }
2786
2787 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2788 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2789 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
2790 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2791 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
2792
2793 constexpr auto nColumns = ColTypes_t::list_size;
2794
2795 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2796 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
2797
2798 // Declare return type to the interpreter, for future use by jitted actions
2799 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2800 if (retTypeName.empty()) {
2801 // The type is not known to the interpreter.
2802 // We must not error out here, but if/when this column is used in jitted code
2803 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2804 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2805 }
2806
2807 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
2808 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2810
2812 newCols.AddDefine(std::move(newColumn));
2813
2814 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
2815
2816 return newInterface;
2817 }
2818
2819 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2820 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2821 // this way compilation of `Define` has no way to continue after throwing the error.
2822 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2823 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2824 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2825 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
2826 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
2827 {
2828 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2829 "Error in `Define`: type returned by expression is not default-constructible");
2830 return *this; // never reached
2831 }
2832
2833 template <typename... ColumnTypes>
2834 RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
2835 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2836 {
2837 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2838
2839 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
2840 // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
2841 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
2844
2845 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
2846 const auto &treename = parsedTreePath.fTreeName;
2847 const auto &dirname = parsedTreePath.fDirName;
2848
2849 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
2850 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
2851
2853 auto newRDF =
2854 std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, /*defaultColumns=*/columnListWithoutSizeColumns);
2855
2856 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
2857 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names.
2858 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
2859 fProxiedPtr);
2860
2861 if (!options.fLazy)
2862 *resPtr;
2863 return resPtr;
2864 }
2865
2866 ////////////////////////////////////////////////////////////////////////////
2867 /// \brief Implementation of cache.
2868 template <typename... ColTypes, std::size_t... S>
2869 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
2870 {
2871 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2872
2873 // Check at compile time that the columns types are copy constructible
2874 constexpr bool areCopyConstructible =
2875 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
2876 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2877
2878 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
2879
2880 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
2881 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
2882 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
2883
2884 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
2885
2886 return cachedRDF;
2887 }
2888
2889 template <bool IsSingleColumn, typename F>
2891 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
2892 const std::vector<std::string> &variationTags, std::string_view variationName)
2893 {
2894 using F_t = std::decay_t<F>;
2895 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
2896 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
2897 constexpr auto nColumns = ColTypes_t::list_size;
2898
2899 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
2900
2901 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
2902 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
2903
2904 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2905 if (retTypeName.empty()) {
2906 // The type is not known to the interpreter, but we don't want to error out
2907 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
2908 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2909 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2910 }
2911
2912 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
2913 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
2914 validColumnNames);
2915
2917 newCols.AddVariation(std::move(variation));
2918
2919 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
2920
2921 return newInterface;
2922 }
2923
2924 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
2925 const std::vector<std::string> &variationTags,
2926 std::string_view variationName, bool isSingleColumn)
2927 {
2928 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
2929 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
2930 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
2931
2932 for (auto &colName : colNames) {
2933 RDFInternal::CheckValidCppVarName(colName, "Vary");
2936 }
2937 RDFInternal::CheckValidCppVarName(variationName, "Vary");
2938
2939 // when varying multiple columns, they must be different columns
2940 if (colNames.size() > 1) {
2941 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
2942 if (uniqueCols.size() != colNames.size())
2943 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
2944 }
2945
2946 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2947 auto jittedVariation =
2948 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
2949 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
2950
2952 newColRegister.AddVariation(std::move(jittedVariation));
2953
2954 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
2955
2956 return newInterface;
2957 }
2958
2959 template <typename Helper, typename ActionResultType>
2960 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
2961 const std::shared_ptr<Helper> &hPtr,
2963 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
2964 {
2965 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
2966 }
2967
2968 template <typename Helper, typename ActionResultType, typename... Others>
2969 RResultPtr<ActionResultType>
2970 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
2971 const std::shared_ptr<Helper>& /*hPtr*/,
2972 Others...)
2973 {
2974 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
2975 "columns! The action helper type was ") +
2976 typeid(Helper).name());
2977 return {};
2978 }
2979
2980protected:
2981 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2982 const RDFInternal::RColumnRegister &colRegister)
2983 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
2984 {
2985 }
2986
2987 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2988};
2989
2990} // namespace RDF
2991
2992} // namespace ROOT
2993
2994#endif // ROOT_RDF_INTERFACE
#define f(i)
Definition RSha256.hxx:104
#define h(i)
Definition RSha256.hxx:106
unsigned int UInt_t
Definition RtypesCore.h:46
unsigned long long ULong64_t
Definition RtypesCore.h:81
#define X(type, name)
#define R__ASSERT(e)
Definition TError.h:118
constexpr Int_t kError
Definition TError.h:46
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void Run(bool jit=true)
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition RAction.hxx:53
A binder for user-defined columns, variations and aliases.
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
ColumnNames_t GetNames() const
Return the list of the names of the defined columns (Defines + Aliases).
A dataset specification for RDataFrame.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
void CheckIMTDisabled(std::string_view callerName)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
The public interface to the RDataFrame federation of classes.
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RInterface< Proxied, DS_t > VaryImpl(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RInterface< Proxied, DS_t > JittedVaryImpl(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName, bool isSingleColumn)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Overload to avoid ambiguity between C++20 string, vector<string> construction from init list.
A RDataSource implementation which is built on top of result proxies.
Smart pointer for the return type of actions.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define F(x, y, z)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair< ULong64_t, ULong64_t > &&newRange)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec)
Changes the input dataset specification of an RDataFrame.
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:99
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void TriggerRun(ROOT::RDF::RNode node)
Trigger the execution of an RDataFrame computation graph.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
std::vector< std::string > ColumnNames_t
ROOT type_traits extensions.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition TROOT.cxx:556
Definition graph.py:1
Definition tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.