Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/InternalTreeUtils.hxx" // for GetFileNamesFromTree and GetFriendInfo
15#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
26#include "ROOT/RDF/RRange.hxx"
27#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RResultPtr.hxx"
32#include "ROOT/RStringView.hxx"
33#include "ROOT/RVec.hxx"
34#include "ROOT/TypeTraits.hxx"
35#include "RtypesCore.h" // for ULong64_t
36#include "TChain.h" // for checking fLoopManger->GetTree() return type
37#include "TDirectory.h"
38#include "TH1.h" // For Histo actions
39#include "TH2.h" // For Histo actions
40#include "TH3.h" // For Histo actions
41#include "THn.h"
42#include "TProfile.h"
43#include "TProfile2D.h"
44#include "TStatistic.h"
45
46#include <algorithm>
47#include <cstddef>
48#include <initializer_list>
49#include <iterator> // std::back_insterter
50#include <limits>
51#include <memory>
52#include <set>
53#include <sstream>
54#include <stdexcept>
55#include <string>
56#include <type_traits> // is_same, enable_if
57#include <typeinfo>
58#include <unordered_set>
59#include <utility> // std::index_sequence
60#include <vector>
61
62class TGraph;
63
64// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
65namespace ROOT {
68void EnableImplicitMT(UInt_t numthreads);
69class RDataFrame;
70namespace Internal {
71namespace RDF {
73}
74} // namespace Internal
75} // namespace ROOT
76namespace cling {
77std::string printValue(ROOT::RDataFrame *tdf);
78}
79
80namespace ROOT {
81namespace RDF {
84namespace TTraits = ROOT::TypeTraits;
85
86template <typename Proxied, typename DataSource>
87class RInterface;
88
89using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
90
91// clang-format off
92/**
93 * \class ROOT::RDF::RInterface
94 * \ingroup dataframe
95 * \brief The public interface to the RDataFrame federation of classes.
96 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
97 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
98 *
99 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
100 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
101 */
102// clang-format on
103template <typename Proxied, typename DataSource = void>
105 using DS_t = DataSource;
109 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
111
112 template <typename T, typename W>
113 friend class RInterface;
114
115 friend void RDFInternal::TriggerRun(RNode &node);
116
117 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
118 ///< The RLoopManager at the root of this computation graph. Never null.
120 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
122
123 /// Contains the columns defined up to this node.
125
126 std::string DescribeDataset() const
127 {
128 // TTree/TChain as input
129 const auto tree = fLoopManager->GetTree();
130 if (tree) {
131 const auto treeName = tree->GetName();
132 const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
133 const auto treeType = isTChain ? "TChain" : "TTree";
134 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
135 const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
136 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
137 std::stringstream ss;
138 ss << "Dataframe from " << treeType << " " << treeName;
139 if (isInMemory) {
140 ss << " (in-memory)";
141 } else {
143 const auto numFiles = files.size();
144 if (numFiles == 1) {
145 ss << " in file " << files[0];
146 } else {
147 ss << " in files\n";
148 for (auto i = 0u; i < numFiles; i++) {
149 ss << " " << files[i];
150 if (i < numFiles - 1)
151 ss << '\n';
152 }
153 }
154 }
155 if (hasFriends) {
156 const auto numFriends = friendInfo.fFriendNames.size();
157 if (numFriends == 1) {
158 ss << "\nwith friend\n";
159 } else {
160 ss << "\nwith friends\n";
161 }
162 for (auto i = 0u; i < numFriends; i++) {
163 const auto nameAlias = friendInfo.fFriendNames[i];
164 const auto files = friendInfo.fFriendFileNames[i];
165 const auto numFiles = files.size();
166 const auto subnames = friendInfo.fFriendChainSubNames[i];
167 ss << " " << nameAlias.first;
168 if (nameAlias.first != nameAlias.second)
169 ss << " (" << nameAlias.second << ")";
170 // case: TTree as friend
171 if (numFiles == 1) {
172 ss << " " << files[0];
173 }
174 // case: TChain as friend
175 else {
176 ss << '\n';
177 for (auto j = 0u; j < numFiles; j++) {
178 ss << " " << subnames[j] << " " << files[j];
179 if (j < numFiles - 1)
180 ss << '\n';
181 }
182 }
183 if (i < numFriends - 1)
184 ss << '\n';
185 }
186 }
187 return ss.str();
188 }
189 // Datasource as input
190 else if (fDataSource) {
191 const auto datasourceLabel = fDataSource->GetLabel();
192 return "Dataframe from datasource " + datasourceLabel;
193 }
194 // Trivial/empty datasource
195 else {
196 const auto n = fLoopManager->GetNEmptyEntries();
197 if (n == 1) {
198 return "Empty dataframe filling 1 row";
199 } else {
200 return "Empty dataframe filling " + std::to_string(n) + " rows";
201 }
202 }
203 }
204
205public:
206 ////////////////////////////////////////////////////////////////////////////
207 /// \brief Copy-assignment operator for RInterface.
208 RInterface &operator=(const RInterface &) = default;
209
210 ////////////////////////////////////////////////////////////////////////////
211 /// \brief Copy-ctor for RInterface.
212 RInterface(const RInterface &) = default;
213
214 ////////////////////////////////////////////////////////////////////////////
215 /// \brief Move-ctor for RInterface.
216 RInterface(RInterface &&) = default;
217
218 ////////////////////////////////////////////////////////////////////////////
219 /// \brief Move-assignment operator for RInterface.
221
222 ////////////////////////////////////////////////////////////////////////////
223 /// \brief Build a RInterface from a RLoopManager.
224 /// This constructor is only available for RInterface<RLoopManager>.
225 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
226 RInterface(const std::shared_ptr<RLoopManager> &proxied)
227 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource()), fColRegister(proxied)
228 {
230 }
231
232 ////////////////////////////////////////////////////////////////////////////
233 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
234 /// Different RDataFrame methods return different C++ types. All nodes, however,
235 /// can be cast to this common type at the cost of a small performance penalty.
236 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
237 /// around via (non-template, C++11) helper functions.
238 /// Example usage:
239 /// ~~~{.cpp}
240 /// // a function that conditionally adds a Range to a RDataFrame node.
241 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
242 /// {
243 /// return mustAddRange ? df.Range(1) : df;
244 /// }
245 /// // use as :
246 /// ROOT::RDataFrame df(10);
247 /// auto maybeRanged = MaybeAddRange(df, true);
248 /// ~~~
249 /// Note that it is not a problem to pass RNode's by value.
250 operator RNode() const
251 {
252 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister,
254 }
255
256 ////////////////////////////////////////////////////////////////////////////
257 /// \brief Append a filter to the call graph.
258 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
259 /// signalling whether the event has passed the selection (true) or not (false).
260 /// \param[in] columns Names of the columns/branches in input to the filter function.
261 /// \param[in] name Optional name of this filter. See `Report`.
262 /// \return the filter node of the computation graph.
263 ///
264 /// Append a filter node at the point of the call graph corresponding to the
265 /// object this method is called on.
266 /// The callable `f` should not have side-effects (e.g. modification of an
267 /// external or static variable) to ensure correct results when implicit
268 /// multi-threading is active.
269 ///
270 /// RDataFrame only evaluates filters when necessary: if multiple filters
271 /// are chained one after another, they are executed in order and the first
272 /// one returning false causes the event to be discarded.
273 /// Even if multiple actions or transformations depend on the same filter,
274 /// it is executed once per entry. If its result is requested more than
275 /// once, the cached result is served.
276 ///
277 /// ### Example usage:
278 /// ~~~{.cpp}
279 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
280 /// auto filtered = df.Filter(myCut, {"x", "y"});
281 ///
282 /// // String: it must contain valid C++ except that column names can be used instead of variable names
283 /// auto filtered = df.Filter("x*y > 0");
284 /// ~~~
285 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
287 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
288 {
289 RDFInternal::CheckFilter(f);
290 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
291 constexpr auto nColumns = ColTypes_t::list_size;
292 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
293 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
294
296
297 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
298 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister, fDataSource);
299 }
300
301 ////////////////////////////////////////////////////////////////////////////
302 /// \brief Append a filter to the call graph.
303 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
304 /// signalling whether the event has passed the selection (true) or not (false).
305 /// \param[in] name Optional name of this filter. See `Report`.
306 /// \return the filter node of the computation graph.
307 ///
308 /// Refer to the first overload of this method for the full documentation.
309 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
311 {
312 // The sfinae is there in order to pick up the overloaded method which accepts two strings
313 // rather than this template method.
314 return Filter(f, {}, name);
315 }
316
317 ////////////////////////////////////////////////////////////////////////////
318 /// \brief Append a filter to the call graph.
319 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
320 /// signalling whether the event has passed the selection (true) or not (false).
321 /// \param[in] columns Names of the columns/branches in input to the filter function.
322 /// \return the filter node of the computation graph.
323 ///
324 /// Refer to the first overload of this method for the full documentation.
325 template <typename F>
326 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
327 {
328 return Filter(f, ColumnNames_t{columns});
329 }
330
331 ////////////////////////////////////////////////////////////////////////////
332 /// \brief Append a filter to the call graph.
333 /// \param[in] expression The filter expression in C++
334 /// \param[in] name Optional name of this filter. See `Report`.
335 /// \return the filter node of the computation graph.
336 ///
337 /// The expression is just-in-time compiled and used to filter entries. It must
338 /// be valid C++ syntax in which variable names are substituted with the names
339 /// of branches/columns.
340 ///
341 /// ### Example usage:
342 /// ~~~{.cpp}
343 /// auto filtered_df = df.Filter("myCollection.size() > 3");
344 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
345 /// ~~~
346 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
347 {
348 // deleted by the jitted call to JitFilterHelper
349 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
350 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
351 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
352 const auto jittedFilter =
355
358 }
359
360 // clang-format off
361 ////////////////////////////////////////////////////////////////////////////
362 /// \brief Define a new column.
363 /// \param[in] name The name of the defined column.
364 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
365 /// \param[in] columns Names of the columns/branches in input to the producer function.
366 /// \return the first node of the computation graph for which the new quantity is defined.
367 ///
368 /// Define a column that will be visible from all subsequent nodes
369 /// of the functional chain. The `expression` is only evaluated for entries that pass
370 /// all the preceding filters.
371 /// A new variable is created called `name`, accessible as if it was contained
372 /// in the dataset from subsequent transformations/actions.
373 ///
374 /// Use cases include:
375 /// * caching the results of complex calculations for easy and efficient multiple access
376 /// * extraction of quantities of interest from complex objects
377 ///
378 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
379 ///
380 /// ### Example usage:
381 /// ~~~{.cpp}
382 /// // assuming a function with signature:
383 /// double myComplexCalculation(const RVec<float> &muon_pts);
384 /// // we can pass it directly to Define
385 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
386 /// // alternatively, we can pass the body of the function as a string, as in Filter:
387 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
388 /// ~~~
389 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
390 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
391 {
392 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Define");
393 }
394 // clang-format on
395
396 // clang-format off
397 ////////////////////////////////////////////////////////////////////////////
398 /// \brief Define a new column with a value dependent on the processing slot.
399 /// \param[in] name The name of the defined column.
400 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
401 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
402 /// \return the first node of the computation graph for which the new quantity is defined.
403 ///
404 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
405 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
406 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
407 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
408 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
409 ///
410 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
411 /// ~~~{.cpp}
412 /// int function(unsigned int, double, double);
413 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
414 /// df.DefineSlot("x", function, {"column1", "column2"})
415 /// ~~~
416 ///
417 /// See Define for more information.
418 template <typename F>
419 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
420 {
421 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "DefineSlot");
422 }
423 // clang-format on
424
425 // clang-format off
426 ////////////////////////////////////////////////////////////////////////////
427 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
428 /// \param[in] name The name of the defined column.
429 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
430 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
431 /// \return the first node of the computation graph for which the new quantity is defined.
432 ///
433 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
434 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
435 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
436 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
437 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
438 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
439 ///
440 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
441 /// ~~~{.cpp}
442 /// int function(unsigned int, ULong64_t, double, double);
443 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
444 /// DefineSlotEntry("x", function, {"column1", "column2"})
445 /// ~~~
446 ///
447 /// See Define for more information.
448 template <typename F>
449 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
450 {
451 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
452 "DefineSlotEntry");
453 }
454 // clang-format on
455
456 ////////////////////////////////////////////////////////////////////////////
457 /// \brief Define a new column.
458 /// \param[in] name The name of the defined column.
459 /// \param[in] expression An expression in C++ which represents the defined value
460 /// \return the first node of the computation graph for which the new quantity is defined.
461 ///
462 /// The expression is just-in-time compiled and used to produce the column entries.
463 /// It must be valid C++ syntax in which variable names are substituted with the names
464 /// of branches/columns.
465 ///
466 /// Refer to the first overload of this method for the full documentation.
467 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
468 {
469 constexpr auto where = "Define";
471 // these checks must be done before jitting lest we throw exceptions in jitted code
474
475 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
476 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
477 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
478
480 newCols.AddColumn(jittedDefine);
481
482 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
483
484 return newInterface;
485 }
486
487 ////////////////////////////////////////////////////////////////////////////
488 /// \brief Overwrite the value and/or type of an existing column.
489 /// \param[in] name The name of the column to redefine.
490 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
491 /// \param[in] columns Names of the columns/branches in input to the expression.
492 /// \return the first node of the computation graph for which the quantity is redefined.
493 ///
494 /// The old value of the column can be used as an input for the expression.
495 ///
496 /// An exception is thrown in case the column to redefine does not already exist.
497 /// See Define() for more information.
498 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
499 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
500 {
501 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Redefine");
502 }
503
504 // clang-format off
505 ////////////////////////////////////////////////////////////////////////////
506 /// \brief Overwrite the value and/or type of an existing column.
507 /// \param[in] name The name of the column to redefine.
508 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
509 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
510 /// \return the first node of the computation graph for which the new quantity is defined.
511 ///
512 /// The old value of the column can be used as an input for the expression.
513 /// An exception is thrown in case the column to redefine does not already exist.
514 ///
515 /// See DefineSlot() for more information.
516 // clang-format on
517 template <typename F>
518 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
519 {
520 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "RedefineSlot");
521 }
522
523 // clang-format off
524 ////////////////////////////////////////////////////////////////////////////
525 /// \brief Overwrite the value and/or type of an existing column.
526 /// \param[in] name The name of the column to redefine.
527 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
528 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
529 /// \return the first node of the computation graph for which the new quantity is defined.
530 ///
531 /// The old value of the column can be used as an input for the expression.
532 /// An exception is thrown in case the column to re-define does not already exist.
533 ///
534 /// See DefineSlotEntry() for more information.
535 // clang-format on
536 template <typename F>
537 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
538 {
539 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
540 "RedefineSlotEntry");
541 }
542
543 ////////////////////////////////////////////////////////////////////////////
544 /// \brief Overwrite the value and/or type of an existing column.
545 /// \param[in] name The name of the column to redefine.
546 /// \param[in] expression An expression in C++ which represents the defined value
547 /// \return the first node of the computation graph for which the new quantity is defined.
548 ///
549 /// The expression is just-in-time compiled and used to produce the column entries.
550 /// It must be valid C++ syntax in which variable names are substituted with the names
551 /// of branches/columns.
552 ///
553 /// The old value of the column can be used as an input for the expression.
554 /// An exception is thrown in case the column to re-define does not already exist.
555 ///
556 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
557 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
558 {
559 constexpr auto where = "Redefine";
564
565 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
566 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
567 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
568
570 newCols.AddColumn(jittedDefine);
571
572 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
573
574 return newInterface;
575 }
576
577 ////////////////////////////////////////////////////////////////////////////
578 /// \brief Define a new column that is updated when the input sample changes.
579 /// \param[in] name The name of the defined column.
580 /// \param[in] expression A C++ callable that computes the new value of the defined column.
581 /// \return the first node of the computation graph for which the new quantity is defined.
582 ///
583 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
584 /// where:
585 /// - `T` is the type of the defined column
586 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
587 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
588 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
589 /// being processed (see the class docs for more information).
590 ///
591 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
592 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
593 /// starts rather than at every entry.
594 ///
595 /// ### Example usage:
596 /// ~~~{.cpp}
597 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
598 /// df.DefinePerSample("weightbysample",
599 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
600 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
601 /// ~~~
602 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
603 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
604 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
605 {
606 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
609
610 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
611 if (retTypeName.empty()) {
612 // The type is not known to the interpreter.
613 // We must not error out here, but if/when this column is used in jitted code
614 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
615 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
616 }
617
618 auto newColumn =
619 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
620
622 newCols.AddColumn(std::move(newColumn));
623 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
624 return newInterface;
625 }
626
627 ////////////////////////////////////////////////////////////////////////////
628 /// \brief Define a new column that is updated when the input sample changes.
629 /// \param[in] name The name of the defined column.
630 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
631 /// \return the first node of the computation graph for which the new quantity is defined.
632 ///
633 /// The expression is just-in-time compiled and used to produce the column entries.
634 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
635 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
636 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
637 ///
638 /// ### Example usage:
639 /// ~~~{.py}
640 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
641 /// df.DefinePerSample("weightbysample", "rdfsampleinfo_.Contains('sample1') ? 1.0f : 2.0f")
642 /// ~~~
643 ///
644 /// \note
645 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
646 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
647 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
648 /// ~~~{.py}
649 /// ROOT.gInterpreter.Declare(
650 /// """
651 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
652 /// return id.Contains("sample1") ? 1.0f : 2.0f;
653 /// }
654 /// """)
655 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
656 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
657 /// ~~~
658 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
659 {
660 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
661 // these checks must be done before jitting lest we throw exceptions in jitted code
664
665 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
666 auto jittedDefine =
667 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
668
670 newCols.AddColumn(jittedDefine);
671
672 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
673
674 return newInterface;
675 }
676
677 /// \brief Register systematic variations for an existing column.
678 /// \param[in] colNames names of the columns for which varied values are provided.
679 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
680 /// take any column values as input, similarly to what happens with Filter and Define calls. It must
681 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
682 /// \param[in] inputColumns the names of the columns to be passed to the callable.
683 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
684 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
685 ///
686 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
687 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
688 /// results that depend on any varied quantity a map/dictionary of varied results can be produced with
689 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
690 ///
691 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
692 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
693 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
694 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
695 ///
696 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
697 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
698 /// ~~~{.cpp}
699 /// auto nominal_hx =
700 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
701 /// .Filter("pt > k")
702 /// .Define("x", someFunc, {"pt"})
703 /// .Histo1D("x");
704 ///
705 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
706 /// hx["nominal"].Draw();
707 /// hx["pt:down"].Draw("SAME");
708 /// ~~~
709 template <typename F>
710 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
711 std::size_t nVariations, std::string_view variationName = "")
712 {
713 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
714
715 std::vector<std::string> variationTags;
716 variationTags.reserve(nVariations);
717 for (std::size_t i = 0u; i < nVariations; ++i)
718 variationTags.emplace_back(std::to_string(i));
719
720 const std::string theVariationName{variationName.empty() ? colName : variationName};
721
722 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
723 }
724
725 /// \brief Register systematic variations for an existing columns using auto-generated variation tags.
726 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
727 /// will be auto-generated as the sequence 0...nVariations-1.
728 /// See the documentation of the previous overload for more information.
729 template <typename F>
730 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
731 const std::vector<std::string> &variationTags, std::string_view variationName = "")
732 {
733 std::vector<std::string> colNames{{std::string(colName)}};
734 const std::string theVariationName{variationName.empty() ? colName : variationName};
735
736 return Vary(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, theVariationName);
737 }
738
739 /// \brief Register a systematic variation that affects multiple columns simultaneously.
740 /// This overload of Vary takes a list of column names as first argument rather than a single name and
741 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
742 /// affected column.
743 /// See the documentation of the first Vary overload for more information.
744 ///
745 /// Example usage:
746 /// ~~~{.cpp}
747 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
748 /// df.Vary({"pt", "eta"},
749 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
750 /// {"down", "up"},
751 /// "ptAndEta");
752 /// ~~~
753 template <typename F>
755 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
756 const std::vector<std::string> &variationTags, std::string_view variationName)
757 {
758 /* SANITY CHECKS BEGIN */
759 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
760 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
761 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
762
763 for (auto &colName : colNames) {
764 RDFInternal::CheckValidCppVarName(colName, "Vary");
767 }
768 RDFInternal::CheckValidCppVarName(variationName, "Vary");
769
770 using F_t = std::decay_t<F>;
771 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
772 constexpr auto nColumns = ColTypes_t::list_size;
773 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
774 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
775
776 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
777 static_assert(RDFInternal::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
778
779 if (colNames.size() > 1) {
781 if (!hasInnerRVec)
782 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
783 "does not return an RVec of RVecs.");
784
785 auto colTypes = GetColumnTypeNamesList(colNames);
786 auto allColTypesEqual =
787 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
788 if (!allColTypesEqual)
789 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
790
791 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
792
793 const auto &definesMap = fColRegister.GetColumns();
794 for (auto i = 0u; i < colTypes.size(); ++i) {
795 const auto it = definesMap.find(colNames[i]);
796 const auto &expectedTypeID =
797 it == definesMap.end() ? RDFInternal::TypeName2TypeID(colTypes[i]) : it->second->GetTypeId();
798 if (innerTypeID != expectedTypeID)
799 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
800 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
801 colTypes[i] + ").");
802 }
803 } else {
804 const auto &retTypeID = typeid(typename RetType::value_type);
805 const auto &colName = colNames[0]; // we have only one element in there
806 const auto &definesMap = fColRegister.GetColumns();
807 const auto it = definesMap.find(colName);
808 const auto &expectedTypeID =
809 it == definesMap.end() ? RDFInternal::TypeName2TypeID(GetColumnType(colName)) : it->second->GetTypeId();
810 if (retTypeID != expectedTypeID)
811 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
812 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
813 GetColumnType(colName) + ").");
814 }
815
816 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
817 if (retTypeName.empty()) {
818 // The type is not known to the interpreter, but we don't want to error out
819 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
820 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
821 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
822 }
823
824 // when varying multiple columns, they must be different columns
825 if (colNames.size() > 1) {
826 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
827 if (uniqueCols.size() != colNames.size())
828 throw std::logic_error("The same column was passed multiple times the same column twice: ");
829 }
830 /* SANITY CHECKS END */
831
832 auto variation = std::make_shared<RDFInternal::RVariation<F_t>>(
833 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
834 validColumnNames);
835
837 newCols.AddVariation(variation);
838
839 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
840
841 return newInterface;
842 }
843
844 /// \brief Register systematic variations for one or more existing columns using auto-generated tags.
845 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
846 /// will be auto-generated as the sequence 0...nVariations-1.
847 /// See the documentation of the previous overload for more information.
848 template <typename F>
850 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
851 std::size_t nVariations, std::string_view variationName)
852 {
853 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
854
855 std::vector<std::string> variationTags;
856 variationTags.reserve(nVariations);
857 for (std::size_t i = 0u; i < nVariations; ++i)
858 variationTags.emplace_back(std::to_string(i));
859
860 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
861 }
862
863 /// \brief Register systematic variations for an existing column.
864 /// \param[in] colName name of the column for which varied values are provided.
865 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
866 /// values for the specified column.
867 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
868 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
869 /// colName is used if none is provided.
870 ///
871 /// ~~~{.cpp}
872 /// auto nominal_hx =
873 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
874 /// .Filter("pt > k")
875 /// .Define("x", someFunc, {"pt"})
876 /// .Histo1D("x");
877 ///
878 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
879 /// hx["nominal"].Draw();
880 /// hx["pt:down"].Draw("SAME");
881 /// ~~~
882 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
883 const std::vector<std::string> &variationTags, std::string_view variationName = "")
884 {
885 std::vector<std::string> colNames{{std::string(colName)}};
886 const std::string theVariationName{variationName.empty() ? colName : variationName};
887
888 return Vary(std::move(colNames), expression, variationTags, theVariationName);
889 }
890
891 /// \brief Register systematic variations for an existing column.
892 /// \param[in] colName name of the column for which varied values are provided.
893 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
894 /// values for the specified column.
895 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
896 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
897 /// colName is used if none is provided.
898 ///
899 /// See the documentation for the previous overload for more information.
900 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
901 std::string_view variationName = "")
902 {
903 std::vector<std::string> colNames{{std::string(colName)}};
904 const std::string theVariationName{variationName.empty() ? colName : variationName};
905
906 return Vary(std::move(colNames), expression, nVariations, theVariationName);
907 }
908
909 /// \brief Register systematic variations for one or more existing columns.
910 /// \param[in] colNames names of the columns for which varied values are provided.
911 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
912 /// values for the specified columns.
913 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
914 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
915 ///
916 /// ~~~{.cpp}
917 /// auto nominal_hx =
918 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
919 /// .Histo1D("x", "y");
920 ///
921 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
922 /// hx["nominal"].Draw();
923 /// hx["xy:0"].Draw("SAME");
924 /// hx["xy:1"].Draw("SAME");
925 /// ~~~
926 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
927 std::size_t nVariations, std::string_view variationName)
928 {
929 std::vector<std::string> variationTags;
930 variationTags.reserve(nVariations);
931 for (std::size_t i = 0u; i < nVariations; ++i)
932 variationTags.emplace_back(std::to_string(i));
933
934 return Vary(colNames, expression, std::move(variationTags), variationName);
935 }
936
937 /// \brief Register systematic variations for one or more existing columns.
938 /// \param[in] colNames names of the columns for which varied values are provided.
939 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
940 /// values for the specified columns.
941 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
942 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
943 ///
944 /// ~~~{.cpp}
945 /// auto nominal_hx =
946 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
947 /// .Histo1D("x", "y");
948 ///
949 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
950 /// hx["nominal"].Draw();
951 /// hx["xy:down"].Draw("SAME");
952 /// hx["xy:up"].Draw("SAME");
953 /// ~~~
954 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
955 const std::vector<std::string> &variationTags, std::string_view variationName)
956 {
957 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
958 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
959 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
960
961 for (auto &colName : colNames) {
962 RDFInternal::CheckValidCppVarName(colName, "Vary");
965 }
966 RDFInternal::CheckValidCppVarName(variationName, "Vary");
967
968 // when varying multiple columns, they must be different columns
969 if (colNames.size() > 1) {
970 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
971 if (uniqueCols.size() != colNames.size())
972 throw std::logic_error("The same column was passed multiple times the same column twice: ");
973 }
974
975 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
976 auto jittedVariation =
977 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
978 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap);
979
981 newColRegister.AddVariation(std::move(jittedVariation));
982
983 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister), fDataSource);
984
985 return newInterface;
986 }
987
988 ////////////////////////////////////////////////////////////////////////////
989 /// \brief Allow to refer to a column with a different name.
990 /// \param[in] alias name of the column alias
991 /// \param[in] columnName of the column to be aliased
992 /// \return the first node of the computation graph for which the alias is available.
993 ///
994 /// Aliasing an alias is supported.
995 ///
996 /// ### Example usage:
997 /// ~~~{.cpp}
998 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
999 /// ~~~
1000 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1001 {
1002 // The symmetry with Define is clear. We want to:
1003 // - Create globally the alias and return this very node, unchanged
1004 // - Make aliases accessible based on chains and not globally
1005
1006 // Helper to find out if a name is a column
1007 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1008
1009 constexpr auto where = "Alias";
1011 // If the alias name is a column name, there is a problem
1013
1014 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1015
1017 newCols.AddAlias(alias, validColumnName);
1018
1019 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
1020
1021 return newInterface;
1022 }
1023
1024 ////////////////////////////////////////////////////////////////////////////
1025 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1026 /// \tparam ColumnTypes variadic list of branch/column types.
1027 /// \param[in] treename The name of the output TTree.
1028 /// \param[in] filename The name of the output TFile.
1029 /// \param[in] columnList The list of names of the columns/branches to be written.
1030 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1031 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1032 ///
1033 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1034 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1035 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1036 /// written out and it appears before the array in the columnList.
1037 ///
1038 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1039 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1040 /// present, by default all friend top-level branches that have names that do not collide with
1041 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1042 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1043 /// '<friendname>_<branchname>' in order to differentiate them from the branches in the main tree/chain.
1044 ///
1045 /// ### Writing to a sub-directory
1046 ///
1047 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1048 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1049 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1050 ///
1051 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1052 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
1053 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
1054 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1055 /// error out if such a "shuffled" TTree is used in a friendship.
1056 ///
1057 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
1058 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
1059 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
1060 ///
1061 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1062 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1063 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1064 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1065 ///
1066 /// ### Example invocations:
1067 ///
1068 /// ~~~{.cpp}
1069 /// // without specifying template parameters (column types automatically deduced)
1070 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1071 ///
1072 /// // specifying template parameters ("x" is `int`, "y" is `float`)
1073 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1074 /// ~~~
1075 ///
1076 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1077 /// `RSnapshotOptions`:
1078 /// ~~~{.cpp}
1079 /// RSnapshotOptions opts;
1080 /// opts.fLazy = true;
1081 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1082 /// ~~~
1083 template <typename... ColumnTypes>
1085 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
1086 const RSnapshotOptions &options = RSnapshotOptions())
1087 {
1088 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1089 }
1090
1091 ////////////////////////////////////////////////////////////////////////////
1092 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1093 /// \param[in] treename The name of the output TTree.
1094 /// \param[in] filename The name of the output TFile.
1095 /// \param[in] columnList The list of names of the columns/branches to be written.
1096 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1097 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1098 ///
1099 /// This function returns a `RDataFrame` built with the output tree as a source.
1100 /// The types of the columns are automatically inferred and do not need to be specified.
1101 ///
1102 /// See above for a more complete description and example usages.
1103 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1104 const ColumnNames_t &columnList,
1105 const RSnapshotOptions &options = RSnapshotOptions())
1106 {
1107 // like columnList but with `#var` columns removed
1108 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1109 // like columnListWithoutSizeColumns but with aliases resolved
1110 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1112 // like validCols but with missing size branches required by array branches added in the right positions
1113 const auto pairOfColumnLists =
1115 std::move(colListNoAliases), std::move(colListNoPoundSizes));
1116 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1117 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1118
1119
1120 const auto fullTreeName = treename;
1121 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1122 treename = parsedTreePath.fTreeName;
1123 const auto &dirname = parsedTreePath.fDirName;
1124
1125 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
1126 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
1127 colListWithAliasesAndSizeBranches, options});
1128
1130 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, colListNoAliasesWithSizeBranches);
1131
1132 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1133 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, colListNoAliasesWithSizeBranches.size());
1134
1135 if (!options.fLazy)
1136 *resPtr;
1137 return resPtr;
1138 }
1139
1140 // clang-format off
1141 ////////////////////////////////////////////////////////////////////////////
1142 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1143 /// \param[in] treename The name of the output TTree.
1144 /// \param[in] filename The name of the output TFile.
1145 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1146 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1147 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1148 ///
1149 /// This function returns a `RDataFrame` built with the output tree as a source.
1150 /// The types of the columns are automatically inferred and do not need to be specified.
1151 ///
1152 /// See above for a more complete description and example usages.
1153 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1154 std::string_view columnNameRegexp = "",
1155 const RSnapshotOptions &options = RSnapshotOptions())
1156 {
1157 const auto definedColumns = fColRegister.GetNames();
1158 auto *tree = fLoopManager->GetTree();
1159 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1160 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1161 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1162 ColumnNames_t dsColumnsWithoutSizeColumns;
1163 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1164 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1165 ColumnNames_t columnNames;
1166 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1167 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1168 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1169 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1170
1171 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1172 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1173 RDFInternal::RemoveDuplicates(columnNames);
1174
1175 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1176 return Snapshot(treename, filename, selectedColumns, options);
1177 }
1178 // clang-format on
1179
1180 // clang-format off
1181 ////////////////////////////////////////////////////////////////////////////
1182 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1183 /// \param[in] treename The name of the output TTree.
1184 /// \param[in] filename The name of the output TFile.
1185 /// \param[in] columnList The list of names of the columns/branches to be written.
1186 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1187 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1188 ///
1189 /// This function returns a `RDataFrame` built with the output tree as a source.
1190 /// The types of the columns are automatically inferred and do not need to be specified.
1191 ///
1192 /// See above for a more complete description and example usages.
1193 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1194 std::initializer_list<std::string> columnList,
1195 const RSnapshotOptions &options = RSnapshotOptions())
1196 {
1197 ColumnNames_t selectedColumns(columnList);
1198 return Snapshot(treename, filename, selectedColumns, options);
1199 }
1200 // clang-format on
1201
1202 ////////////////////////////////////////////////////////////////////////////
1203 /// \brief Save selected columns in memory.
1204 /// \tparam ColumnTypes variadic list of branch/column types.
1205 /// \param[in] columnList columns to be cached in memory.
1206 /// \return a `RDataFrame` that wraps the cached dataset.
1207 ///
1208 /// This action returns a new `RDataFrame` object, completely detached from
1209 /// the originating `RDataFrame`. The new dataframe only contains the cached
1210 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1211 ///
1212 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1213 /// fits in memory and that will be accessed many times.
1214 ///
1215 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1216 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1217 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1218 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1219 ///
1220 /// ### Example usage:
1221 ///
1222 /// **Types and columns specified:**
1223 /// ~~~{.cpp}
1224 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1225 /// ~~~
1226 ///
1227 /// **Types inferred and columns specified (this invocation relies on jitting):**
1228 /// ~~~{.cpp}
1229 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1230 /// ~~~
1231 ///
1232 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1233 /// ~~~{.cpp}
1234 /// auto cache_all_cols_df = df.Cache(myRegexp);
1235 /// ~~~
1236 template <typename... ColumnTypes>
1238 {
1239 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1240 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1241 }
1242
1243 ////////////////////////////////////////////////////////////////////////////
1244 /// \brief Save selected columns in memory.
1245 /// \param[in] columnList columns to be cached in memory
1246 /// \return a `RDataFrame` that wraps the cached dataset.
1247 ///
1248 /// See the previous overloads for more information.
1250 {
1251 // Early return: if the list of columns is empty, just return an empty RDF
1252 // If we proceed, the jitted call will not compile!
1253 if (columnList.empty()) {
1254 auto nEntries = *this->Count();
1255 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1256 return emptyRDF;
1257 }
1258
1259 std::stringstream cacheCall;
1260 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1261 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1263 // build a string equivalent to
1264 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1265 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1266 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1268 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1269 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1270
1271 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1272
1273 const auto validColumnNames =
1274 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1275 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1276 "Cache", /*vector2rvec=*/false);
1277 for (const auto &colType : colTypes)
1278 cacheCall << colType << ", ";
1279 if (!columnListWithoutSizeColumns.empty())
1280 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1281 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1282 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1283
1284 // book the code to jit with the RLoopManager and trigger the event loop
1285 fLoopManager->ToJitExec(cacheCall.str());
1286 fLoopManager->Jit();
1287
1288 return resRDF;
1289 }
1290
1291 ////////////////////////////////////////////////////////////////////////////
1292 /// \brief Save selected columns in memory.
1293 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1294 /// \return a `RDataFrame` that wraps the cached dataset.
1295 ///
1296 /// The existing columns are matched against the regular expression. If the string provided
1297 /// is empty, all columns are selected. See the previous overloads for more information.
1298 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1299 {
1300 const auto definedColumns = fColRegister.GetNames();
1301 auto *tree = fLoopManager->GetTree();
1302 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1303 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1304 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1305 ColumnNames_t dsColumnsWithoutSizeColumns;
1306 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1307 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1308 ColumnNames_t columnNames;
1309 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1310 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1311 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1312 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1313 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1314 return Cache(selectedColumns);
1315 }
1316
1317 ////////////////////////////////////////////////////////////////////////////
1318 /// \brief Save selected columns in memory.
1319 /// \param[in] columnList columns to be cached in memory.
1320 /// \return a `RDataFrame` that wraps the cached dataset.
1321 ///
1322 /// See the previous overloads for more information.
1323 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1324 {
1325 ColumnNames_t selectedColumns(columnList);
1326 return Cache(selectedColumns);
1327 }
1328
1329 // clang-format off
1330 ////////////////////////////////////////////////////////////////////////////
1331 /// \brief Creates a node that filters entries based on range: [begin, end).
1332 /// \param[in] begin Initial entry number considered for this range.
1333 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1334 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1335 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1336 ///
1337 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1338 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1339 ///
1340 /// ### Example usage:
1341 /// ~~~{.cpp}
1342 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1343 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1344 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1345 /// ~~~
1346 // clang-format on
1347 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1348 {
1349 // check invariants
1350 if (stride == 0 || (end != 0 && end < begin))
1351 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1352 CheckIMTDisabled("Range");
1353
1354 using Range_t = RDFDetail::RRange<Proxied>;
1355 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1357 return tdf_r;
1358 }
1359
1360 // clang-format off
1361 ////////////////////////////////////////////////////////////////////////////
1362 /// \brief Creates a node that filters entries based on range.
1363 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1364 /// \return a node of the computation graph for which the range is defined.
1365 ///
1366 /// See the other Range overload for a detailed description.
1367 // clang-format on
1368 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1369
1370 // clang-format off
1371 ////////////////////////////////////////////////////////////////////////////
1372 /// \brief Execute a user-defined function on each entry (*instant action*).
1373 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1374 /// \param[in] columns Names of the columns/branches in input to the user function.
1375 ///
1376 /// The callable `f` is invoked once per entry. This is an *instant action*:
1377 /// upon invocation, an event loop as well as execution of all scheduled actions
1378 /// is triggered.
1379 /// Users are responsible for the thread-safety of this callable when executing
1380 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1381 ///
1382 /// ### Example usage:
1383 /// ~~~{.cpp}
1384 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1385 /// ~~~
1386 // clang-format on
1387 template <typename F>
1388 void Foreach(F f, const ColumnNames_t &columns = {})
1389 {
1390 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1391 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1392 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1393 }
1394
1395 // clang-format off
1396 ////////////////////////////////////////////////////////////////////////////
1397 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1398 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1399 /// \param[in] columns Names of the columns/branches in input to the user function.
1400 ///
1401 /// Same as `Foreach`, but the user-defined function takes an extra
1402 /// `unsigned int` as its first parameter, the *processing slot index*.
1403 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1404 /// for each thread of execution.
1405 /// This is meant as a helper in writing thread-safe `Foreach`
1406 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1407 /// The user-defined processing callable is able to follow different
1408 /// *streams of processing* indexed by the first parameter.
1409 /// `ForeachSlot` works just as well with single-thread execution: in that
1410 /// case `slot` will always be `0`.
1411 ///
1412 /// ### Example usage:
1413 /// ~~~{.cpp}
1414 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1415 /// ~~~
1416 // clang-format on
1417 template <typename F>
1418 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1419 {
1421 constexpr auto nColumns = ColTypes_t::list_size;
1422
1423 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1424 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1425
1426 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1428
1429 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1430
1431 fLoopManager->Run();
1432 }
1433
1434 // clang-format off
1435 ////////////////////////////////////////////////////////////////////////////
1436 /// \brief Execute a user-defined reduce operation on the values of a column.
1437 /// \tparam F The type of the reduce callable. Automatically deduced.
1438 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1439 /// \param[in] f A callable with signature `T(T,T)`
1440 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1441 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1442 ///
1443 /// A reduction takes two values of a column and merges them into one (e.g.
1444 /// by summing them, taking the maximum, etc). This action performs the
1445 /// specified reduction operation on all processed column values, returning
1446 /// a single value of the same type. The callable f must satisfy the general
1447 /// requirements of a *processing function* besides having signature `T(T,T)`
1448 /// where `T` is the type of column columnName.
1449 ///
1450 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1451 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1452 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1453 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1454 /// overload.
1455 ///
1456 /// ### Example usage:
1457 /// ~~~{.cpp}
1458 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1459 /// ~~~
1460 ///
1461 /// This action is *lazy*: upon invocation of this method the calculation is
1462 /// booked but not executed. Also see RResultPtr.
1463 // clang-format on
1464 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1465 RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1466 {
1467 static_assert(
1468 std::is_default_constructible<T>::value,
1469 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1470 return Reduce(std::move(f), columnName, T());
1471 }
1472
1473 ////////////////////////////////////////////////////////////////////////////
1474 /// \brief Execute a user-defined reduce operation on the values of a column.
1475 /// \tparam F The type of the reduce callable. Automatically deduced.
1476 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1477 /// \param[in] f A callable with signature `T(T,T)`
1478 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1479 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
1480 /// \return the reduced quantity wrapped in a RResultPtr.
1481 ///
1482 /// ### Example usage:
1483 /// ~~~{.cpp}
1484 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1485 /// ~~~
1486 /// See the description of the first Reduce overload for more information.
1487 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1488 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1489 {
1490 return Aggregate(f, f, columnName, redIdentity);
1491 }
1492
1493 ////////////////////////////////////////////////////////////////////////////
1494 /// \brief Return the number of entries processed (*lazy action*).
1495 /// \return the number of entries wrapped in a RResultPtr.
1496 ///
1497 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1498 /// This action is *lazy*: upon invocation of this method the calculation is
1499 /// booked but not executed. Also see RResultPtr.
1500 ///
1501 /// ### Example usage:
1502 /// ~~~{.cpp}
1503 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1504 /// ~~~
1505 ///
1507 {
1508 const auto nSlots = fLoopManager->GetNSlots();
1509 auto cSPtr = std::make_shared<ULong64_t>(0);
1510 using Helper_t = RDFInternal::CountHelper;
1512 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1514 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1515 }
1516
1517 ////////////////////////////////////////////////////////////////////////////
1518 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1519 /// \tparam T The type of the column.
1520 /// \tparam COLL The type of collection used to store the values.
1521 /// \param[in] column The name of the column to collect the values of.
1522 /// \return the content of the selected column wrapped in a RResultPtr.
1523 ///
1524 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1525 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1526 /// ### Example usage:
1527 /// ~~~{.cpp}
1528 /// // In this case intCol is a std::vector<int>
1529 /// auto intCol = rdf.Take<int>("integerColumn");
1530 /// // Same content as above but in this case taken as a RVec<int>
1531 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1532 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1533 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1534 /// ~~~
1535 /// This action is *lazy*: upon invocation of this method the calculation is
1536 /// booked but not executed. Also see RResultPtr.
1537 template <typename T, typename COLL = std::vector<T>>
1538 RResultPtr<COLL> Take(std::string_view column = "")
1539 {
1540 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1541
1542 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1543 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1544
1545 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1547 auto valuesPtr = std::make_shared<COLL>();
1548 const auto nSlots = fLoopManager->GetNSlots();
1549
1550 auto action =
1551 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1552 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1553 }
1554
1555 ////////////////////////////////////////////////////////////////////////////
1556 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1557 /// \tparam V The type of the column used to fill the histogram.
1558 /// \param[in] model The returned histogram will be constructed using this as a model.
1559 /// \param[in] vName The name of the column that will fill the histogram.
1560 /// \return the monodimensional histogram wrapped in a RResultPtr.
1561 ///
1562 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1563 /// is filled with each one of the elements of the container. In case multiple columns of container type
1564 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1565 /// possibly different lengths between events).
1566 /// This action is *lazy*: upon invocation of this method the calculation is
1567 /// booked but not executed. Also see RResultPtr.
1568 ///
1569 /// ### Example usage:
1570 /// ~~~{.cpp}
1571 /// // Deduce column type (this invocation needs jitting internally)
1572 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1573 /// // Explicit column type
1574 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1575 /// ~~~
1576 ///
1577 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1578 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1579 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1580 template <typename V = RDFDetail::RInferredType>
1581 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1582 {
1583 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1584
1585 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1586
1587 std::shared_ptr<::TH1D> h(nullptr);
1588 {
1589 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1590 h = model.GetHistogram();
1591 h->SetDirectory(nullptr);
1592 }
1593
1594 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1595 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1596 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h);
1597 }
1598
1599 ////////////////////////////////////////////////////////////////////////////
1600 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1601 /// \tparam V The type of the column used to fill the histogram.
1602 /// \param[in] vName The name of the column that will fill the histogram.
1603 /// \return the monodimensional histogram wrapped in a RResultPtr.
1604 ///
1605 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1606 /// The "name" and "title" strings are built starting from the input column name.
1607 /// See the description of the first Histo1D() overload for more details.
1608 ///
1609 /// ### Example usage:
1610 /// ~~~{.cpp}
1611 /// // Deduce column type (this invocation needs jitting internally)
1612 /// auto myHist1 = myDf.Histo1D("myColumn");
1613 /// // Explicit column type
1614 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1615 /// ~~~
1616 template <typename V = RDFDetail::RInferredType>
1617 RResultPtr<::TH1D> Histo1D(std::string_view vName)
1618 {
1619 const auto h_name = std::string(vName);
1620 const auto h_title = h_name + ";" + h_name + ";count";
1621 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1622 }
1623
1624 ////////////////////////////////////////////////////////////////////////////
1625 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1626 /// \tparam V The type of the column used to fill the histogram.
1627 /// \tparam W The type of the column used as weights.
1628 /// \param[in] model The returned histogram will be constructed using this as a model.
1629 /// \param[in] vName The name of the column that will fill the histogram.
1630 /// \param[in] wName The name of the column that will provide the weights.
1631 /// \return the monodimensional histogram wrapped in a RResultPtr.
1632 ///
1633 /// See the description of the first Histo1D() overload for more details.
1634 ///
1635 /// ### Example usage:
1636 /// ~~~{.cpp}
1637 /// // Deduce column type (this invocation needs jitting internally)
1638 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1639 /// // Explicit column type
1640 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1641 /// ~~~
1642 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1643 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1644 {
1645 const std::vector<std::string_view> columnViews = {vName, wName};
1646 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1647 ? ColumnNames_t()
1648 : ColumnNames_t(columnViews.begin(), columnViews.end());
1649 std::shared_ptr<::TH1D> h(nullptr);
1650 {
1651 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1652 h = model.GetHistogram();
1653 }
1654 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h);
1655 }
1656
1657 ////////////////////////////////////////////////////////////////////////////
1658 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1659 /// \tparam V The type of the column used to fill the histogram.
1660 /// \tparam W The type of the column used as weights.
1661 /// \param[in] vName The name of the column that will fill the histogram.
1662 /// \param[in] wName The name of the column that will provide the weights.
1663 /// \return the monodimensional histogram wrapped in a RResultPtr.
1664 ///
1665 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1666 /// The "name" and "title" strings are built starting from the input column names.
1667 /// See the description of the first Histo1D() overload for more details.
1668 ///
1669 /// ### Example usage:
1670 /// ~~~{.cpp}
1671 /// // Deduce column types (this invocation needs jitting internally)
1672 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1673 /// // Explicit column types
1674 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1675 /// ~~~
1676 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1677 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1678 {
1679 // We build name and title based on the value and weight column names
1680 std::string str_vName{vName};
1681 std::string str_wName{wName};
1682 const auto h_name = str_vName + "_weighted_" + str_wName;
1683 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1684 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1685 }
1686
1687 ////////////////////////////////////////////////////////////////////////////
1688 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1689 /// \tparam V The type of the column used to fill the histogram.
1690 /// \tparam W The type of the column used as weights.
1691 /// \param[in] model The returned histogram will be constructed using this as a model.
1692 /// \return the monodimensional histogram wrapped in a RResultPtr.
1693 ///
1694 /// This overload will use the first two default columns as column names.
1695 /// See the description of the first Histo1D() overload for more details.
1696 template <typename V, typename W>
1697 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1698 {
1699 return Histo1D<V, W>(model, "", "");
1700 }
1701
1702 ////////////////////////////////////////////////////////////////////////////
1703 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1704 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1705 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1706 /// \param[in] model The returned histogram will be constructed using this as a model.
1707 /// \param[in] v1Name The name of the column that will fill the x axis.
1708 /// \param[in] v2Name The name of the column that will fill the y axis.
1709 /// \return the bidimensional histogram wrapped in a RResultPtr.
1710 ///
1711 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1712 /// is filled with each one of the elements of the container. In case multiple columns of container type
1713 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1714 /// possibly different lengths between events).
1715 /// This action is *lazy*: upon invocation of this method the calculation is
1716 /// booked but not executed. Also see RResultPtr.
1717 ///
1718 /// ### Example usage:
1719 /// ~~~{.cpp}
1720 /// // Deduce column types (this invocation needs jitting internally)
1721 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1722 /// // Explicit column types
1723 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1724 /// ~~~
1725 ///
1726 ///
1727 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1728 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1729 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1730 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1731 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1732 {
1733 std::shared_ptr<::TH2D> h(nullptr);
1734 {
1735 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1736 h = model.GetHistogram();
1737 }
1738 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1739 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1740 }
1741 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1742 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1743 ? ColumnNames_t()
1744 : ColumnNames_t(columnViews.begin(), columnViews.end());
1745 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h);
1746 }
1747
1748 ////////////////////////////////////////////////////////////////////////////
1749 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1750 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1751 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1752 /// \tparam W The type of the column used for the weights of the histogram.
1753 /// \param[in] model The returned histogram will be constructed using this as a model.
1754 /// \param[in] v1Name The name of the column that will fill the x axis.
1755 /// \param[in] v2Name The name of the column that will fill the y axis.
1756 /// \param[in] wName The name of the column that will provide the weights.
1757 /// \return the bidimensional histogram wrapped in a RResultPtr.
1758 ///
1759 /// This action is *lazy*: upon invocation of this method the calculation is
1760 /// booked but not executed. Also see RResultPtr.
1761 ///
1762 /// ### Example usage:
1763 /// ~~~{.cpp}
1764 /// // Deduce column types (this invocation needs jitting internally)
1765 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1766 /// // Explicit column types
1767 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1768 /// ~~~
1769 ///
1770 /// See the documentation of the first Histo2D() overload for more details.
1771 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1772 typename W = RDFDetail::RInferredType>
1774 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1775 {
1776 std::shared_ptr<::TH2D> h(nullptr);
1777 {
1778 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1779 h = model.GetHistogram();
1780 }
1781 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1782 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1783 }
1784 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1785 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1786 ? ColumnNames_t()
1787 : ColumnNames_t(columnViews.begin(), columnViews.end());
1788 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h);
1789 }
1790
1791 template <typename V1, typename V2, typename W>
1793 {
1794 return Histo2D<V1, V2, W>(model, "", "", "");
1795 }
1796
1797 ////////////////////////////////////////////////////////////////////////////
1798 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1799 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1800 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1801 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1802 /// \param[in] model The returned histogram will be constructed using this as a model.
1803 /// \param[in] v1Name The name of the column that will fill the x axis.
1804 /// \param[in] v2Name The name of the column that will fill the y axis.
1805 /// \param[in] v3Name The name of the column that will fill the z axis.
1806 /// \return the tridimensional histogram wrapped in a RResultPtr.
1807 ///
1808 /// This action is *lazy*: upon invocation of this method the calculation is
1809 /// booked but not executed. Also see RResultPtr.
1810 ///
1811 /// ### Example usage:
1812 /// ~~~{.cpp}
1813 /// // Deduce column types (this invocation needs jitting internally)
1814 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1815 /// "myValueX", "myValueY", "myValueZ");
1816 /// // Explicit column types
1817 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1818 /// "myValueX", "myValueY", "myValueZ");
1819 /// ~~~
1820 ///
1821 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1822 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1823 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1824 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1825 typename V3 = RDFDetail::RInferredType>
1826 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
1827 std::string_view v3Name = "")
1828 {
1829 std::shared_ptr<::TH3D> h(nullptr);
1830 {
1831 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1832 h = model.GetHistogram();
1833 }
1834 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1835 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1836 }
1837 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1838 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1839 ? ColumnNames_t()
1840 : ColumnNames_t(columnViews.begin(), columnViews.end());
1841 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h);
1842 }
1843
1844 ////////////////////////////////////////////////////////////////////////////
1845 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1846 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1847 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1848 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1849 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1850 /// \param[in] model The returned histogram will be constructed using this as a model.
1851 /// \param[in] v1Name The name of the column that will fill the x axis.
1852 /// \param[in] v2Name The name of the column that will fill the y axis.
1853 /// \param[in] v3Name The name of the column that will fill the z axis.
1854 /// \param[in] wName The name of the column that will provide the weights.
1855 /// \return the tridimensional histogram wrapped in a RResultPtr.
1856 ///
1857 /// This action is *lazy*: upon invocation of this method the calculation is
1858 /// booked but not executed. Also see RResultPtr.
1859 ///
1860 /// ### Example usage:
1861 /// ~~~{.cpp}
1862 /// // Deduce column types (this invocation needs jitting internally)
1863 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1864 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1865 /// // Explicit column types
1866 /// using d_t = double;
1867 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1868 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1869 /// ~~~
1870 ///
1871 ///
1872 /// See the documentation of the first Histo2D() overload for more details.
1873 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1874 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1875 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
1876 std::string_view v3Name, std::string_view wName)
1877 {
1878 std::shared_ptr<::TH3D> h(nullptr);
1879 {
1880 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1881 h = model.GetHistogram();
1882 }
1883 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1884 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1885 }
1886 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1887 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1888 ? ColumnNames_t()
1889 : ColumnNames_t(columnViews.begin(), columnViews.end());
1890 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h);
1891 }
1892
1893 template <typename V1, typename V2, typename V3, typename W>
1895 {
1896 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1897 }
1898
1899 ////////////////////////////////////////////////////////////////////////////
1900 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1901 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1902 /// present.
1903 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1904 /// object.
1905 /// \param[in] model The returned histogram will be constructed using this as a model.
1906 /// \param[in] columnList
1907 /// A list containing the names of the columns that will be passed when calling `Fill`.
1908 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1909 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1910 ///
1911 /// This action is *lazy*: upon invocation of this method the calculation is
1912 /// booked but not executed. See RResultPtr documentation.
1913 ///
1914 /// ### Example usage:
1915 /// ~~~{.cpp}
1916 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1917 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1918 /// {"col0", "col1", "col2", "col3"});
1919 /// ~~~
1920 ///
1921 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1922 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1923 {
1924 std::shared_ptr<::THnD> h(nullptr);
1925 {
1926 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1927 h = model.GetHistogram();
1928
1929 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1930 h->Sumw2();
1931 } else if (int(columnList.size()) != h->GetNdimensions()) {
1932 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1933 }
1934 }
1935 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h);
1936 }
1937
1938 ////////////////////////////////////////////////////////////////////////////
1939 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1940 /// \param[in] model The returned histogram will be constructed using this as a model.
1941 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1942 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1943 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1944 ///
1945 /// This action is *lazy*: upon invocation of this method the calculation is
1946 /// booked but not executed. Also see RResultPtr.
1947 ///
1948 /// ### Example usage:
1949 /// ~~~{.cpp}
1950 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
1951 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1952 /// {"col0", "col1", "col2", "col3"});
1953 /// ~~~
1954 ///
1955 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1956 {
1957 std::shared_ptr<::THnD> h(nullptr);
1958 {
1959 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1960 h = model.GetHistogram();
1961
1962 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1963 h->Sumw2();
1964 } else if (int(columnList.size()) != h->GetNdimensions()) {
1965 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1966 }
1967 }
1968 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h,
1969 columnList.size());
1970 }
1971
1972 ////////////////////////////////////////////////////////////////////////////
1973 /// \brief Fill and return a graph (*lazy action*).
1974 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1975 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1976 /// \param[in] v1Name The name of the column that will fill the x axis.
1977 /// \param[in] v2Name The name of the column that will fill the y axis.
1978 /// \return the graph wrapped in a RResultPtr.
1979 ///
1980 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1981 /// is filled with each one of the elements of the container.
1982 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1983 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1984 /// A name and a title to the graph is given based on the input column names.
1985 ///
1986 /// This action is *lazy*: upon invocation of this method the calculation is
1987 /// booked but not executed. Also see RResultPtr.
1988 ///
1989 /// ### Example usage:
1990 /// ~~~{.cpp}
1991 /// // Deduce column types (this invocation needs jitting internally)
1992 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1993 /// // Explicit column types
1994 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1995 /// ~~~
1996 ///
1997 /// \note Differently from other ROOT interfaces, the returned graph is not associated to gDirectory
1998 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1999 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2000 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2001 RResultPtr<::TGraph> Graph(std::string_view v1Name = "", std::string_view v2Name = "")
2002 {
2003 auto graph = std::make_shared<::TGraph>();
2004 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2005 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2006 ? ColumnNames_t()
2007 : ColumnNames_t(columnViews.begin(), columnViews.end());
2008
2009 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2010
2011 // We build a default name and title based on the input columns
2012 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
2013 const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
2014 const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
2015 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2016 graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
2017 graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
2018 }
2019
2020 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph, graph);
2021 }
2022
2023 ////////////////////////////////////////////////////////////////////////////
2024 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2025 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2026 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2027 /// \param[in] model The model to be considered to build the new return value.
2028 /// \param[in] v1Name The name of the column that will fill the x axis.
2029 /// \param[in] v2Name The name of the column that will fill the y axis.
2030 /// \return the monodimensional profile wrapped in a RResultPtr.
2031 ///
2032 /// This action is *lazy*: upon invocation of this method the calculation is
2033 /// booked but not executed. Also see RResultPtr.
2034 ///
2035 /// ### Example usage:
2036 /// ~~~{.cpp}
2037 /// // Deduce column types (this invocation needs jitting internally)
2038 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2039 /// // Explicit column types
2040 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2041 /// ~~~
2042 ///
2043 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2044 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2045 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2046 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2048 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2049 {
2050 std::shared_ptr<::TProfile> h(nullptr);
2051 {
2052 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2053 h = model.GetProfile();
2054 }
2055
2056 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2057 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2058 }
2059 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2060 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2061 ? ColumnNames_t()
2062 : ColumnNames_t(columnViews.begin(), columnViews.end());
2063 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h);
2064 }
2065
2066 ////////////////////////////////////////////////////////////////////////////
2067 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2068 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2069 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2070 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2071 /// \param[in] model The model to be considered to build the new return value.
2072 /// \param[in] v1Name The name of the column that will fill the x axis.
2073 /// \param[in] v2Name The name of the column that will fill the y axis.
2074 /// \param[in] wName The name of the column that will provide the weights.
2075 /// \return the monodimensional profile wrapped in a RResultPtr.
2076 ///
2077 /// This action is *lazy*: upon invocation of this method the calculation is
2078 /// booked but not executed. Also see RResultPtr.
2079 ///
2080 /// ### Example usage:
2081 /// ~~~{.cpp}
2082 /// // Deduce column types (this invocation needs jitting internally)
2083 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2084 /// // Explicit column types
2085 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2086 /// "xValues", "yValues", "weight");
2087 /// ~~~
2088 ///
2089 /// See the first Profile1D() overload for more details.
2090 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2091 typename W = RDFDetail::RInferredType>
2093 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2094 {
2095 std::shared_ptr<::TProfile> h(nullptr);
2096 {
2097 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2098 h = model.GetProfile();
2099 }
2100
2101 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2102 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2103 }
2104 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2105 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2106 ? ColumnNames_t()
2107 : ColumnNames_t(columnViews.begin(), columnViews.end());
2108 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h);
2109 }
2110
2111 ////////////////////////////////////////////////////////////////////////////
2112 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2113 /// See the first Profile1D() overload for more details.
2114 template <typename V1, typename V2, typename W>
2116 {
2117 return Profile1D<V1, V2, W>(model, "", "", "");
2118 }
2119
2120 ////////////////////////////////////////////////////////////////////////////
2121 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2122 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2123 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2124 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2125 /// \param[in] model The returned profile will be constructed using this as a model.
2126 /// \param[in] v1Name The name of the column that will fill the x axis.
2127 /// \param[in] v2Name The name of the column that will fill the y axis.
2128 /// \param[in] v3Name The name of the column that will fill the z axis.
2129 /// \return the bidimensional profile wrapped in a RResultPtr.
2130 ///
2131 /// This action is *lazy*: upon invocation of this method the calculation is
2132 /// booked but not executed. Also see RResultPtr.
2133 ///
2134 /// ### Example usage:
2135 /// ~~~{.cpp}
2136 /// // Deduce column types (this invocation needs jitting internally)
2137 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2138 /// "xValues", "yValues", "zValues");
2139 /// // Explicit column types
2140 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2141 /// "xValues", "yValues", "zValues");
2142 /// ~~~
2143 ///
2144 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2145 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2146 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2147 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2148 typename V3 = RDFDetail::RInferredType>
2149 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2150 std::string_view v2Name = "", std::string_view v3Name = "")
2151 {
2152 std::shared_ptr<::TProfile2D> h(nullptr);
2153 {
2154 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2155 h = model.GetProfile();
2156 }
2157
2158 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2159 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2160 }
2161 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2162 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2163 ? ColumnNames_t()
2164 : ColumnNames_t(columnViews.begin(), columnViews.end());
2165 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h);
2166 }
2167
2168 ////////////////////////////////////////////////////////////////////////////
2169 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2170 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2171 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2172 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2173 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2174 /// \param[in] model The returned histogram will be constructed using this as a model.
2175 /// \param[in] v1Name The name of the column that will fill the x axis.
2176 /// \param[in] v2Name The name of the column that will fill the y axis.
2177 /// \param[in] v3Name The name of the column that will fill the z axis.
2178 /// \param[in] wName The name of the column that will provide the weights.
2179 /// \return the bidimensional profile wrapped in a RResultPtr.
2180 ///
2181 /// This action is *lazy*: upon invocation of this method the calculation is
2182 /// booked but not executed. Also see RResultPtr.
2183 ///
2184 /// ### Example usage:
2185 /// ~~~{.cpp}
2186 /// // Deduce column types (this invocation needs jitting internally)
2187 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2188 /// "xValues", "yValues", "zValues", "weight");
2189 /// // Explicit column types
2190 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2191 /// "xValues", "yValues", "zValues", "weight");
2192 /// ~~~
2193 ///
2194 /// See the first Profile2D() overload for more details.
2195 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2196 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2197 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2198 std::string_view v3Name, std::string_view wName)
2199 {
2200 std::shared_ptr<::TProfile2D> h(nullptr);
2201 {
2202 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2203 h = model.GetProfile();
2204 }
2205
2206 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2207 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2208 }
2209 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2210 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2211 ? ColumnNames_t()
2212 : ColumnNames_t(columnViews.begin(), columnViews.end());
2213 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h);
2214 }
2215
2216 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2217 /// See the first Profile2D() overload for more details.
2218 template <typename V1, typename V2, typename V3, typename W>
2220 {
2221 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2222 }
2223
2224 ////////////////////////////////////////////////////////////////////////////
2225 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2226 ///
2227 /// Type T must provide at least:
2228 /// - a copy-constructor
2229 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2230 /// (these types can also be passed as template parameters to this method)
2231 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2232 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2233 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2234 /// the TCollection*).
2235 ///
2236 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2237 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2238 /// \tparam T The type of the object to fill. Automatically deduced.
2239 /// \param[in] model The model to be considered to build the new return value.
2240 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2241 /// \return the filled object wrapped in a RResultPtr.
2242 ///
2243 /// The user gives up ownership of the model object.
2244 /// The list of column names to be used for filling must always be specified.
2245 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2246 /// Also see RResultPtr.
2247 ///
2248 /// ### Example usage:
2249 /// ~~~{.cpp}
2250 /// MyClass obj;
2251 /// // Deduce column types (this invocation needs jitting internally, and in this case
2252 /// // MyClass needs to be known to the interpreter)
2253 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2254 /// // explicit column types
2255 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2256 /// ~~~
2257 ///
2258 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2259 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2260 {
2261 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2262 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2263 throw std::runtime_error("The absence of axes limits is not supported yet.");
2264 }
2265 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h,
2266 columnList.size());
2267 }
2268
2269 ////////////////////////////////////////////////////////////////////////////
2270 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2271 ///
2272 /// \tparam V The type of the value column
2273 /// \param[in] value The name of the column with the values to fill the statistics with.
2274 /// \return the filled TStatistic object wrapped in a RResultPtr.
2275 ///
2276 /// ### Example usage:
2277 /// ~~~{.cpp}
2278 /// // Deduce column type (this invocation needs jitting internally)
2279 /// auto stats0 = myDf.Stats("values");
2280 /// // Explicit column type
2281 /// auto stats1 = myDf.Stats<float>("values");
2282 /// ~~~
2283 ///
2284 template <typename V = RDFDetail::RInferredType>
2285 RResultPtr<TStatistic> Stats(std::string_view value = "")
2286 {
2287 ColumnNames_t columns;
2288 if (!value.empty()) {
2289 columns.emplace_back(std::string(value));
2290 }
2291 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2292 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2293 return Fill(TStatistic(), validColumnNames);
2294 } else {
2295 return Fill<V>(TStatistic(), validColumnNames);
2296 }
2297 }
2298
2299 ////////////////////////////////////////////////////////////////////////////
2300 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2301 ///
2302 /// \tparam V The type of the value column
2303 /// \tparam W The type of the weight column
2304 /// \param[in] value The name of the column with the values to fill the statistics with.
2305 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2306 /// \return the filled TStatistic object wrapped in a RResultPtr.
2307 ///
2308 /// ### Example usage:
2309 /// ~~~{.cpp}
2310 /// // Deduce column types (this invocation needs jitting internally)
2311 /// auto stats0 = myDf.Stats("values", "weights");
2312 /// // Explicit column types
2313 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2314 /// ~~~
2315 ///
2316 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2317 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2318 {
2319 ColumnNames_t columns{std::string(value), std::string(weight)};
2320 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2321 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2322 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2323 // We have 3 cases:
2324 // 1. Both types are inferred: we use Fill and let the jit kick in.
2325 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2326 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2327 if (vIsInferred && wIsInferred) {
2328 return Fill(TStatistic(), validColumnNames);
2329 } else if (vIsInferred != wIsInferred) {
2330 std::string error("The ");
2331 error += vIsInferred ? "value " : "weight ";
2332 error += "column type is explicit, while the ";
2333 error += vIsInferred ? "weight " : "value ";
2334 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2335 throw std::runtime_error(error);
2336 } else {
2337 return Fill<V, W>(TStatistic(), validColumnNames);
2338 }
2339 }
2340
2341 ////////////////////////////////////////////////////////////////////////////
2342 /// \brief Return the minimum of processed column values (*lazy action*).
2343 /// \tparam T The type of the branch/column.
2344 /// \param[in] columnName The name of the branch/column to be treated.
2345 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2346 ///
2347 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2348 /// template specialization of this method.
2349 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2350 ///
2351 /// This action is *lazy*: upon invocation of this method the calculation is
2352 /// booked but not executed. Also see RResultPtr.
2353 ///
2354 /// ### Example usage:
2355 /// ~~~{.cpp}
2356 /// // Deduce column type (this invocation needs jitting internally)
2357 /// auto minVal0 = myDf.Min("values");
2358 /// // Explicit column type
2359 /// auto minVal1 = myDf.Min<double>("values");
2360 /// ~~~
2361 ///
2362 template <typename T = RDFDetail::RInferredType>
2363 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2364 {
2365 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2366 using RetType_t = RDFDetail::MinReturnType_t<T>;
2367 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2368 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
2369 }
2370
2371 ////////////////////////////////////////////////////////////////////////////
2372 /// \brief Return the maximum of processed column values (*lazy action*).
2373 /// \tparam T The type of the branch/column.
2374 /// \param[in] columnName The name of the branch/column to be treated.
2375 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2376 ///
2377 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2378 /// template specialization of this method.
2379 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2380 ///
2381 /// This action is *lazy*: upon invocation of this method the calculation is
2382 /// booked but not executed. Also see RResultPtr.
2383 ///
2384 /// ### Example usage:
2385 /// ~~~{.cpp}
2386 /// // Deduce column type (this invocation needs jitting internally)
2387 /// auto maxVal0 = myDf.Max("values");
2388 /// // Explicit column type
2389 /// auto maxVal1 = myDf.Max<double>("values");
2390 /// ~~~
2391 ///
2392 template <typename T = RDFDetail::RInferredType>
2393 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2394 {
2395 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2396 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2397 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2398 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
2399 }
2400
2401 ////////////////////////////////////////////////////////////////////////////
2402 /// \brief Return the mean of processed column values (*lazy action*).
2403 /// \tparam T The type of the branch/column.
2404 /// \param[in] columnName The name of the branch/column to be treated.
2405 /// \return the mean value of the selected column wrapped in a RResultPtr.
2406 ///
2407 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2408 /// template specialization of this method.
2409 ///
2410 /// This action is *lazy*: upon invocation of this method the calculation is
2411 /// booked but not executed. Also see RResultPtr.
2412 ///
2413 /// ### Example usage:
2414 /// ~~~{.cpp}
2415 /// // Deduce column type (this invocation needs jitting internally)
2416 /// auto meanVal0 = myDf.Mean("values");
2417 /// // Explicit column type
2418 /// auto meanVal1 = myDf.Mean<double>("values");
2419 /// ~~~
2420 ///
2421 template <typename T = RDFDetail::RInferredType>
2422 RResultPtr<double> Mean(std::string_view columnName = "")
2423 {
2424 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2425 auto meanV = std::make_shared<double>(0);
2426 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
2427 }
2428
2429 ////////////////////////////////////////////////////////////////////////////
2430 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2431 /// \tparam T The type of the branch/column.
2432 /// \param[in] columnName The name of the branch/column to be treated.
2433 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2434 ///
2435 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2436 /// template specialization of this method.
2437 ///
2438 /// This action is *lazy*: upon invocation of this method the calculation is
2439 /// booked but not executed. Also see RResultPtr.
2440 ///
2441 /// ### Example usage:
2442 /// ~~~{.cpp}
2443 /// // Deduce column type (this invocation needs jitting internally)
2444 /// auto stdDev0 = myDf.StdDev("values");
2445 /// // Explicit column type
2446 /// auto stdDev1 = myDf.StdDev<double>("values");
2447 /// ~~~
2448 ///
2449 template <typename T = RDFDetail::RInferredType>
2450 RResultPtr<double> StdDev(std::string_view columnName = "")
2451 {
2452 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2453 auto stdDeviationV = std::make_shared<double>(0);
2454 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
2455 }
2456
2457 // clang-format off
2458 ////////////////////////////////////////////////////////////////////////////
2459 /// \brief Return the sum of processed column values (*lazy action*).
2460 /// \tparam T The type of the branch/column.
2461 /// \param[in] columnName The name of the branch/column.
2462 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2463 /// \return the sum of the selected column wrapped in a RResultPtr.
2464 ///
2465 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2466 /// template specialization of this method.
2467 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2468 ///
2469 /// This action is *lazy*: upon invocation of this method the calculation is
2470 /// booked but not executed. Also see RResultPtr.
2471 ///
2472 /// ### Example usage:
2473 /// ~~~{.cpp}
2474 /// // Deduce column type (this invocation needs jitting internally)
2475 /// auto sum0 = myDf.Sum("values");
2476 /// // Explicit column type
2477 /// auto sum1 = myDf.Sum<double>("values");
2478 /// ~~~
2479 ///
2480 template <typename T = RDFDetail::RInferredType>
2482 Sum(std::string_view columnName = "",
2483 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2484 {
2485 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2486 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2487 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
2488 }
2489 // clang-format on
2490
2491 ////////////////////////////////////////////////////////////////////////////
2492 /// \brief Gather filtering statistics.
2493 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2494 ///
2495 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2496 /// all named filters in the call graph. Calling this method on a
2497 /// stored chain state (i.e. a graph node different from the first) gathers
2498 /// the stats for all named filters in the chain section between the original
2499 /// `RDataFrame` and that node (included). Stats are gathered in the same
2500 /// order as the named filters have been added to the graph.
2501 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2502 /// effects cuts had.
2503 ///
2504 /// This action is *lazy*: upon invocation of
2505 /// this method the calculation is booked but not executed. See RResultPtr
2506 /// documentation.
2507 ///
2508 /// ### Example usage:
2509 /// ~~~{.cpp}
2510 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2511 /// auto cutReport = filtered3.Report();
2512 /// cutReport->Print();
2513 /// ~~~
2514 ///
2516 {
2517 bool returnEmptyReport = false;
2518 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2519 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2520 // certainly does not contain named filters.
2521 // The number 4 takes into account the implicit columns for entry and slot number
2522 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2523 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GetNames().size() > 4)
2524 returnEmptyReport = true;
2525
2526 auto rep = std::make_shared<RCutFlowReport>();
2527 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2529
2530 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
2532
2533 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2534 }
2535
2536 /////////////////////////////////////////////////////////////////////////////
2537 /// \brief Returns the names of the available columns.
2538 /// \return the container of column names.
2539 ///
2540 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2541 ///
2542 /// ### Example usage:
2543 /// ~~~{.cpp}
2544 /// auto colNames = d.GetColumnNames();
2545 /// // Print columns' names
2546 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
2547 /// ~~~
2548 ///
2550 {
2551 // there could be duplicates between Redefined columns and columns in the data source
2552 std::unordered_set<std::string> allColumns;
2553
2554 auto addIfNotInternal = [&allColumns](std::string_view colName) {
2555 if (!RDFInternal::IsInternalColumn(colName))
2556 allColumns.emplace(colName);
2557 };
2558
2559 auto definedColumns = fColRegister.GetNames();
2560
2561 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
2562
2563 auto tree = fLoopManager->GetTree();
2564 if (tree) {
2565 for (const auto &bName : RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false))
2566 allColumns.emplace(bName);
2567 }
2568
2569 if (fDataSource) {
2570 for (const auto &s : fDataSource->GetColumnNames()) {
2571 if (s.rfind("R_rdf_sizeof", 0) != 0)
2572 allColumns.emplace(s);
2573 }
2574 }
2575
2576 ColumnNames_t ret(allColumns.begin(), allColumns.end());
2577 std::sort(ret.begin(), ret.end());
2578 return ret;
2579 }
2580
2581 /////////////////////////////////////////////////////////////////////////////
2582 /// \brief Return the type of a given column as a string.
2583 /// \return the type of the required column.
2584 ///
2585 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2586 ///
2587 /// ### Example usage:
2588 /// ~~~{.cpp}
2589 /// auto colType = d.GetColumnType("columnName");
2590 /// // Print column type
2591 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
2592 /// ~~~
2593 ///
2594 std::string GetColumnType(std::string_view column)
2595 {
2596 const auto col = fColRegister.ResolveAlias(std::string(column));
2597
2598 RDFDetail::RDefineBase *define = fColRegister.HasName(col) ? fColRegister.GetColumns().at(col).get() : nullptr;
2599
2600 const bool convertVector2RVec = true;
2602 convertVector2RVec);
2603 }
2604
2605 /////////////////////////////////////////////////////////////////////////////
2606 /// \brief Return information about the dataframe.
2607 /// \return information about the dataframe as RDFDescription object
2608 ///
2609 /// This convenience function describes the dataframe and combines the following information:
2610 /// - Number of event loops run, see GetNRuns()
2611 /// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
2612 /// - Column names, see GetColumnNames()
2613 /// - Column types, see GetColumnType()
2614 /// - Number of processing slots, see GetNSlots()
2615 ///
2616 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2617 /// The result is dependent on the node from which this method is called, e.g. the list of
2618 /// defined columns returned by GetDefinedColumnNames().
2619 ///
2620 /// Please note that this is a convenience feature and the layout of the output can be subject
2621 /// to change and should be parsed via RDFDescription methods.
2622 ///
2623 /// ### Example usage:
2624 /// ~~~{.cpp}
2625 /// RDataFrame df(10);
2626 /// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
2627 /// // Describe the dataframe
2628 /// df2.Describe().Print()
2629 /// df2.Describe().Print(/*shortFormat=*/true)
2630 /// std::cout << df2.Describe().AsString() << std::endl;
2631 /// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
2632 /// ~~~
2633 ///
2635 {
2636 // Build set of defined column names to find later in all column names
2637 // the defined columns more efficiently
2638 const auto columnNames = GetColumnNames();
2639 std::set<std::string> definedColumnNamesSet;
2640 for (const auto &name : GetDefinedColumnNames())
2641 definedColumnNamesSet.insert(name);
2642
2643 // Get information for the metadata table
2644 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines",
2645 "Event loops run", "Processing slots"};
2646 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2647 std::to_string(definedColumnNamesSet.size()),
2648 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
2649
2650 // Set header for metadata table
2651 const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
2652 // The column width of the values is required to make right-bound numbers and is equal
2653 // to the maximum of the string "Value" and all values to be put in this column.
2654 const auto columnWidthValues =
2655 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
2656 std::stringstream ss;
2657 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
2658 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
2659
2660 // Build metadata table
2661 // All numbers should be bound to the right and strings bound to the left.
2662 for (auto i = 0u; i < metadataProperties.size(); i++) {
2663 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2664 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
2665 }
2666 ss << '\n'; // put space between this and the next table
2667
2668 // Set header for columns table
2669 const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
2670 const auto columnTypes = GetColumnTypeNamesList(columnNames);
2671 const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
2672 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
2673 << "Origin\n"
2674 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
2675 << "------\n";
2676
2677 // Build columns table
2678 const auto nCols = columnNames.size();
2679 for (auto i = 0u; i < nCols; i++) {
2680 auto origin = "Dataset";
2681 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2682 origin = "Define";
2683 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2684 << columnTypes[i] << origin;
2685 if (i < nCols - 1)
2686 ss << '\n';
2687 }
2688 // Use the string returned from DescribeDataset() as the 'brief' description
2689 // Use the converted to string stringstream ss as the 'full' description
2690 return RDFDescription(DescribeDataset(), ss.str());
2691 }
2692
2693 /// \brief Returns the names of the filters created.
2694 /// \return the container of filters names.
2695 ///
2696 /// If called on a root node, all the filters in the computation graph will
2697 /// be printed. For any other node, only the filters upstream of that node.
2698 /// Filters without a name are printed as "Unnamed Filter"
2699 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2700 ///
2701 /// ### Example usage:
2702 /// ~~~{.cpp}
2703 /// auto filtNames = d.GetFilterNames();
2704 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2705 /// ~~~
2706 ///
2707 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2708
2709 /// \brief Returns the names of the defined columns.
2710 /// \return the container of the defined column names.
2711 ///
2712 /// This is not an action nor a transformation, just a simple utility to
2713 /// get the columns names that have been defined up to the node.
2714 /// If no column has been defined, e.g. on a root node, it returns an
2715 /// empty collection.
2716 ///
2717 /// ### Example usage:
2718 /// ~~~{.cpp}
2719 /// auto defColNames = d.GetDefinedColumnNames();
2720 /// // Print defined columns' names
2721 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
2722 /// ~~~
2723 ///
2725 {
2726 ColumnNames_t definedColumns;
2727
2728 auto columns = fColRegister.GetColumns();
2729
2730 for (const auto &column : columns) {
2731 if (!RDFInternal::IsInternalColumn(column.first))
2732 definedColumns.emplace_back(column.first);
2733 }
2734
2735 return definedColumns;
2736 }
2737
2738 /// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
2739 ///
2740 /// This is not an action nor a transformation, just a simple utility to
2741 /// inspect the systematic variations that have been registered with Vary() up to this node.
2742 /// When called on the root node, it returns an empty descriptor.
2743 ///
2744 /// ### Example usage:
2745 /// ~~~{.cpp}
2746 /// auto variations = d.GetVariations();
2747 /// variations.Print();
2748 /// ~~~
2749 ///
2751 {
2752 return {fColRegister.GetVariations()};
2753 }
2754
2755 /// \brief Checks if a column is present in the dataset.
2756 /// \return true if the column is available, false otherwise
2757 ///
2758 /// This method checks if a column is part of the input ROOT dataset, has
2759 /// been defined or can be provided by the data source.
2760 ///
2761 /// Example usage:
2762 /// ~~~{.cpp}
2763 /// ROOT::RDataFrame base(1);
2764 /// auto rdf = base.Define("definedColumn", [](){return 0;});
2765 /// rdf.HasColumn("definedColumn"); // true: we defined it
2766 /// rdf.HasColumn("rdfentry_"); // true: it's always there
2767 /// rdf.HasColumn("foo"); // false: it is not there
2768 /// ~~~
2769 bool HasColumn(std::string_view columnName)
2770 {
2771 if (fColRegister.HasName(columnName))
2772 return true;
2773
2774 if (auto tree = fLoopManager->GetTree()) {
2775 const auto &branchNames = fLoopManager->GetBranchNames();
2776 const auto branchNamesEnd = branchNames.end();
2777 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2778 return true;
2779 }
2780
2781 if (fDataSource && fDataSource->HasColumn(columnName))
2782 return true;
2783
2784 return false;
2785 }
2786
2787 /// \brief Gets the number of data processing slots.
2788 /// \return The number of data processing slots used by this RDataFrame instance
2789 ///
2790 /// This method returns the number of data processing slots used by this RDataFrame
2791 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
2792 ///
2793 /// Example usage:
2794 /// ~~~{.cpp}
2795 /// ROOT::EnableImplicitMT(6)
2796 /// ROOT::RDataFrame df(1);
2797 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
2798 /// ~~~
2799 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
2800
2801 /// \brief Gets the number of event loops run.
2802 /// \return The number of event loops run by this RDataFrame instance
2803 ///
2804 /// This method returns the number of events loops run so far by this RDataFrame instance.
2805 ///
2806 /// Example usage:
2807 /// ~~~{.cpp}
2808 /// ROOT::RDataFrame df(1);
2809 /// std::cout << df.GetNRuns() << std::endl; // prints "0"
2810 /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
2811 /// std::cout << df.GetNRuns() << std::endl; // prints "1"
2812 /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
2813 /// std::cout << df.GetNRuns() << std::endl; // prints "2"
2814 /// ~~~
2815 unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
2816
2817 // clang-format off
2818 ////////////////////////////////////////////////////////////////////////////
2819 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2820 /// \tparam F The type of the aggregator callable. Automatically deduced.
2821 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2822 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2823 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2824 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2825 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2826 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2827 /// \return the result of the aggregation wrapped in a RResultPtr.
2828 ///
2829 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2830 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2831 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2832 /// the value of the column columnName.
2833 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2834 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2835 ///
2836 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2837 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2838 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2839 ///
2840 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2841 ///
2842 /// Example usage:
2843 /// ~~~{.cpp}
2844 /// auto aggregator = [](double acc, double x) { return acc * x; };
2845 /// ROOT::EnableImplicitMT();
2846 /// // If multithread is enabled, the aggregator function will be called by more threads
2847 /// // and will produce a vector of partial accumulators.
2848 /// // The merger function performs the final aggregation of these partial results.
2849 /// auto merger = [](std::vector<double> &accumulators) {
2850 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2851 /// accumulators[0] *= accumulators[i];
2852 /// }
2853 /// };
2854 ///
2855 /// // The accumulator is initialized at this value by every thread.
2856 /// double initValue = 1.;
2857 ///
2858 /// // Multiplies all elements of the column "x"
2859 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2860 /// ~~~
2861 // clang-format on
2862 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2863 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2864 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2865 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2866 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2867 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2868 {
2869 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2870 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2871
2872 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2873 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2874
2875 auto accObjPtr = std::make_shared<U>(aggIdentity);
2876 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2878 auto action = std::make_unique<Action_t>(
2879 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2881 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2882 }
2883
2884 // clang-format off
2885 ////////////////////////////////////////////////////////////////////////////
2886 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2887 /// \tparam F The type of the aggregator callable. Automatically deduced.
2888 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2889 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2890 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2891 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2892 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2893 /// \return the result of the aggregation wrapped in a RResultPtr.
2894 ///
2895 /// See previous Aggregate overload for more information.
2896 // clang-format on
2897 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2898 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2899 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2900 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2901 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2902 {
2903 static_assert(
2904 std::is_default_constructible<U>::value,
2905 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2906 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2907 }
2908
2909 // clang-format off
2910 ////////////////////////////////////////////////////////////////////////////
2911 /// \brief Book execution of a custom action using a user-defined helper object.
2912 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2913 /// \tparam OtherColumns A list of the types of the other columns used by this action
2914 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2915 /// \param[in] helper The Action Helper to be scheduled.
2916 /// \param[in] columns The names of the columns on which the helper acts.
2917 /// \return the result of the helper wrapped in a RResultPtr.
2918 ///
2919 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2920 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2921 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2922 ///
2923 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2924 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2925 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2926 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2927 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2928 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2929 /// the requested columns for the particular entry being processed.
2930 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2931 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2932 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2933 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations.
2934 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2935 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2936 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2937 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2938 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2939 /// method concurrently, but will always pass different 'slot' numbers.
2940 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2941 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2942 /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2943 ///
2944 /// In case this is called without specifying column types, jitting is used,
2945 /// and the Helper class needs to be known to the interpreter.
2946 ///
2947 /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2948 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2949 // clang-format on
2950
2951 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2953 {
2954 using HelperT = std::decay_t<Helper>;
2955 // TODO add more static sanity checks on Helper
2957 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2958 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2959
2960 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2961 auto resPtr = hPtr->GetResultPtr();
2962
2963 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2964 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2965 } else {
2966 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2967 columns.size());
2968 }
2969 }
2970
2971 ////////////////////////////////////////////////////////////////////////////
2972 /// \brief Provides a representation of the columns in the dataset.
2973 /// \tparam ColumnTypes variadic list of branch/column types.
2974 /// \param[in] columnList Names of the columns to be displayed.
2975 /// \param[in] nRows Number of events for each column to be displayed.
2976 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2977 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2978 ///
2979 /// This function returns a RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2980 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2981 /// complete version through `AsString()`.
2982 ///
2983 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2984 /// RResultPtr.
2985 ///
2986 /// Example usage:
2987 /// ~~~{.cpp}
2988 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2989 /// auto d1 = rdf.Display("");
2990 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2991 /// auto d2 = d.Display({"x", "y"}, 128);
2992 /// // Printing the short representations, the event loop will run
2993 /// d1->Print();
2994 /// d2->Print();
2995 /// ~~~
2996 template <typename... ColumnTypes>
2997 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2998 {
2999 CheckIMTDisabled("Display");
3000 auto newCols = columnList;
3001 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3002 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3003 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3004 // Need to add ULong64_t type corresponding to the first column rdfentry_
3005 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3006 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer));
3007 }
3008
3009 ////////////////////////////////////////////////////////////////////////////
3010 /// \brief Provides a representation of the columns in the dataset.
3011 /// \param[in] columnList Names of the columns to be displayed.
3012 /// \param[in] nRows Number of events for each column to be displayed.
3013 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3014 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3015 ///
3016 /// This overload automatically infers the column types.
3017 /// See the previous overloads for further details.
3018 ///
3019 /// Invoked when no types are specified to Display
3020 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3021 {
3022 CheckIMTDisabled("Display");
3023 auto newCols = columnList;
3024 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3025 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3026 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3027 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3028 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), columnList.size() + 1);
3029 }
3030
3031 ////////////////////////////////////////////////////////////////////////////
3032 /// \brief Provides a representation of the columns in the dataset.
3033 /// \param[in] columnNameRegexp A regular expression to select the columns.
3034 /// \param[in] nRows Number of events for each column to be displayed.
3035 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3036 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3037 ///
3038 /// The existing columns are matched against the regular expression. If the string provided
3039 /// is empty, all columns are selected.
3040 /// See the previous overloads for further details.
3042 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3043 {
3044 const auto columnNames = GetColumnNames();
3045 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3046 return Display(selectedColumns, nRows, nMaxCollectionElements);
3047 }
3048
3049 ////////////////////////////////////////////////////////////////////////////
3050 /// \brief Provides a representation of the columns in the dataset.
3051 /// \param[in] columnList Names of the columns to be displayed.
3052 /// \param[in] nRows Number of events for each column to be displayed.
3053 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3054 ///
3055 /// See the previous overloads for further details.
3057 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3058 {
3059 ColumnNames_t selectedColumns(columnList);
3060 return Display(selectedColumns, nRows, nMaxCollectionElements);
3061 }
3062
3063private:
3065 {
3066 // Entry number column
3067 const std::string entryColName = "rdfentry_";
3068 const std::string entryColType = "ULong64_t";
3069 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
3070 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
3071
3072 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
3074 fColRegister.AddColumn(entryColumn);
3075
3076 // Slot number column
3077 const std::string slotColName = "rdfslot_";
3078 const std::string slotColType = "unsigned int";
3079 auto slotColGen = [](unsigned int slot) { return slot; };
3080 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
3081
3082 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
3084 fColRegister.AddColumn(slotColumn);
3085
3086 fColRegister.AddAlias("tdfentry_", entryColName);
3087 fColRegister.AddAlias("tdfslot_", slotColName);
3088 }
3089
3090 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
3091 {
3092 std::vector<std::string> types;
3093
3094 for (auto column : columnList) {
3095 types.push_back(GetColumnType(column));
3096 }
3097 return types;
3098 }
3099
3100 void CheckIMTDisabled(std::string_view callerName)
3101 {
3103 std::string error(callerName);
3104 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
3105 throw std::runtime_error(error);
3106 }
3107 }
3108
3109 /// Create RAction object, return RResultPtr for the action
3110 /// Overload for the case in which all column types were specified (no jitting).
3111 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
3112 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
3113 /// for which the constructor arguments of the action helper are different from the returned value.
3114 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3115 typename HelperArgType = ActionResultType,
3116 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3118 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3119 const std::shared_ptr<HelperArgType> &helperArg, const int /*nColumns*/ = -1)
3120 {
3121 constexpr auto nColumns = sizeof...(ColTypes);
3122
3123 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3125
3126 const auto nSlots = fLoopManager->GetNSlots();
3127
3128 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, fProxiedPtr, ActionTag{},
3129 fColRegister);
3130 return MakeResultPtr(r, *fLoopManager, std::move(action));
3131 }
3132
3133 /// Create RAction object, return RResultPtr for the action
3134 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
3135 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
3136 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
3137 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3138 typename HelperArgType = ActionResultType,
3139 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3140 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3141 const std::shared_ptr<HelperArgType> &helperArg, const int nColumns = -1)
3142 {
3143 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
3144
3145 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
3146 const unsigned int nSlots = fLoopManager->GetNSlots();
3147
3148 auto *tree = fLoopManager->GetTree();
3149 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
3150
3151 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3152 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
3153 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
3154
3155 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(
3156 *fLoopManager, validColumnNames, fColRegister, fProxiedPtr->GetVariations());
3157 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
3158
3159 auto toJit = RDFInternal::JitBuildAction(
3160 validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<HelperArgType>), typeid(ActionTag), helperArgOnHeap,
3161 tree, nSlots, fColRegister, fDataSource, jittedActionOnHeap);
3162 fLoopManager->ToJitExec(toJit);
3163 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
3164 }
3165
3166 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3167 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3168 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3169 {
3171 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3174 } else {
3178 }
3179
3180 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3181 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3182 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
3183 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3184 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
3185
3186 constexpr auto nColumns = ColTypes_t::list_size;
3187
3188 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3189 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3190
3191 // Declare return type to the interpreter, for future use by jitted actions
3192 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3193 if (retTypeName.empty()) {
3194 // The type is not known to the interpreter.
3195 // We must not error out here, but if/when this column is used in jitted code
3196 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3197 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3198 }
3199
3200 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3201 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3203
3205 newCols.AddColumn(newColumn);
3206
3207 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
3208
3209 return newInterface;
3210 }
3211
3212 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3213 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3214 // this way compilation of `Define` has no way to continue after throwing the error.
3215 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3216 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3217 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3218 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3219 DefineImpl(std::string_view, F, const ColumnNames_t &)
3220 {
3221 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3222 "Error in `Define`: type returned by expression is not default-constructible");
3223 return *this; // never reached
3224 }
3225
3226 template <typename... ColumnTypes>
3227 RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
3228 const ColumnNames_t &columnList, const RSnapshotOptions &options)
3229 {
3230 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3231
3232 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3233 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3236
3237 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3238 const auto &treename = parsedTreePath.fTreeName;
3239 const auto &dirname = parsedTreePath.fDirName;
3240
3241 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3242 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3243
3245 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
3246
3247 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
3248
3249 if (!options.fLazy)
3250 *resPtr;
3251 return resPtr;
3252 }
3253
3254 ////////////////////////////////////////////////////////////////////////////
3255 /// \brief Implementation of cache.
3256 template <typename... ColTypes, std::size_t... S>
3257 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3258 {
3259 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3260
3261 // Check at compile time that the columns types are copy constructible
3262 constexpr bool areCopyConstructible =
3263 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3264 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3265
3266 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3267
3268 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3269 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3270 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3271
3272 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3273
3274 return cachedRDF;
3275 }
3276
3277 template <typename Helper, typename ActionResultType>
3278 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3279 const std::shared_ptr<Helper> &hPtr,
3281 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3282 {
3283 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, 0u);
3284 }
3285
3286 template <typename Helper, typename ActionResultType, typename... Others>
3287 RResultPtr<ActionResultType>
3288 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3289 const std::shared_ptr<Helper>& /*hPtr*/,
3290 Others...)
3291 {
3292 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3293 "columns! The action helper type was ") +
3294 typeid(Helper).name());
3295 return {};
3296 }
3297
3298protected:
3299 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns,
3300 RDataSource *ds)
3301 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fColRegister(columns)
3302 {
3303 }
3304
3306
3307 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3308
3309 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
3310 {
3312 }
3313
3314 template <typename... ColumnTypes>
3316 {
3317 if (fDataSource != nullptr)
3318 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fColRegister);
3319 }
3320};
3321
3322} // namespace RDF
3323
3324} // namespace ROOT
3325
3326#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition Object.C:4
#define f(i)
Definition RSha256.hxx:104
#define h(i)
Definition RSha256.hxx:106
unsigned int UInt_t
Definition RtypesCore.h:46
unsigned long long ULong64_t
Definition RtypesCore.h:81
#define R__ASSERT(e)
Definition TError.h:118
constexpr Int_t kError
Definition TError.h:46
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
RDataSource * GetDataSource() const
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition RAction.hxx:53
A binder for user-defined columns and aliases.
void AddVariation(const std::shared_ptr< RVariationBase > &variation)
Register a new systematic variation.
const DefinesMap_t & GetColumns() const
Returns a map of pointers to the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
void AddColumn(const std::shared_ptr< RDFDetail::RDefineBase > &column)
Add a new booked column.
const VariationsMap_t & GetVariations() const
Returns the multimap of systematic variations, see fVariations.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns (Defines + Aliases).
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual std::string GetLabel()
Return a string representation of the datasource type.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RVariationsDescription GetVariations()
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RLoopManager * GetLoopManager() const
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
std::string DescribeDataset() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns, RDataSource *ds)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RDFDescription Describe()
Return information about the dataframe.
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Smart pointer for the return type of actions.
A descriptor for the systematic variations known to a given RDataFrame node.
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
const Int_t n
Definition legend1.C:16
#define F(x, y, z)
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:51
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:224
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:99
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:374
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:365
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
RFriendInfo GetFriendInfo(const TTree &tree)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
Definition Utils.hxx:35
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:527
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition TROOT.cxx:544
Definition graph.py:1
Definition tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.