Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/InternalTreeUtils.hxx" // for GetFileNamesFromTree and GetFriendInfo
15#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
26#include "ROOT/RDF/RRange.hxx"
27#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RResultPtr.hxx"
32#include "ROOT/RStringView.hxx"
33#include "ROOT/RVec.hxx"
34#include "ROOT/TypeTraits.hxx"
35#include "RtypesCore.h" // for ULong64_t
36#include "TChain.h" // for checking fLoopManger->GetTree() return type
37#include "TDirectory.h"
38#include "TH1.h" // For Histo actions
39#include "TH2.h" // For Histo actions
40#include "TH3.h" // For Histo actions
41#include "THn.h"
42#include "TProfile.h"
43#include "TProfile2D.h"
44#include "TStatistic.h"
45
46#include <algorithm>
47#include <cstddef>
48#include <initializer_list>
49#include <iterator> // std::back_insterter
50#include <limits>
51#include <memory>
52#include <set>
53#include <sstream>
54#include <stdexcept>
55#include <string>
56#include <type_traits> // is_same, enable_if
57#include <typeinfo>
58#include <unordered_set>
59#include <utility> // std::index_sequence
60#include <vector>
61
62class TGraph;
63
64// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
65namespace ROOT {
68void EnableImplicitMT(UInt_t numthreads);
69class RDataFrame;
70namespace Internal {
71namespace RDF {
73}
74} // namespace Internal
75} // namespace ROOT
76namespace cling {
77std::string printValue(ROOT::RDataFrame *tdf);
78}
79
80namespace ROOT {
81namespace RDF {
84namespace TTraits = ROOT::TypeTraits;
85
86template <typename Proxied, typename DataSource>
87class RInterface;
88
89using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
90
91// clang-format off
92/**
93 * \class ROOT::RDF::RInterface
94 * \ingroup dataframe
95 * \brief The public interface to the RDataFrame federation of classes.
96 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
97 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
98 *
99 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
100 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
101 */
102// clang-format on
103template <typename Proxied, typename DataSource = void>
105 using DS_t = DataSource;
109 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
111
112 template <typename T, typename W>
113 friend class RInterface;
114
115 friend void RDFInternal::TriggerRun(RNode &node);
116
117 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
118 ///< The RLoopManager at the root of this computation graph. Never null.
120 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
122
123 /// Contains the columns defined up to this node.
125
126 std::string DescribeDataset() const
127 {
128 // TTree/TChain as input
129 const auto tree = fLoopManager->GetTree();
130 if (tree) {
131 const auto treeName = tree->GetName();
132 const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
133 const auto treeType = isTChain ? "TChain" : "TTree";
134 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
135 const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
136 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
137 std::stringstream ss;
138 ss << "Dataframe from " << treeType << " " << treeName;
139 if (isInMemory) {
140 ss << " (in-memory)";
141 } else {
143 const auto numFiles = files.size();
144 if (numFiles == 1) {
145 ss << " in file " << files[0];
146 } else {
147 ss << " in files\n";
148 for (auto i = 0u; i < numFiles; i++) {
149 ss << " " << files[i];
150 if (i < numFiles - 1)
151 ss << '\n';
152 }
153 }
154 }
155 if (hasFriends) {
156 const auto numFriends = friendInfo.fFriendNames.size();
157 if (numFriends == 1) {
158 ss << "\nwith friend\n";
159 } else {
160 ss << "\nwith friends\n";
161 }
162 for (auto i = 0u; i < numFriends; i++) {
163 const auto nameAlias = friendInfo.fFriendNames[i];
164 const auto files = friendInfo.fFriendFileNames[i];
165 const auto numFiles = files.size();
166 const auto subnames = friendInfo.fFriendChainSubNames[i];
167 ss << " " << nameAlias.first;
168 if (nameAlias.first != nameAlias.second)
169 ss << " (" << nameAlias.second << ")";
170 // case: TTree as friend
171 if (numFiles == 1) {
172 ss << " " << files[0];
173 }
174 // case: TChain as friend
175 else {
176 ss << '\n';
177 for (auto j = 0u; j < numFiles; j++) {
178 ss << " " << subnames[j] << " " << files[j];
179 if (j < numFiles - 1)
180 ss << '\n';
181 }
182 }
183 if (i < numFriends - 1)
184 ss << '\n';
185 }
186 }
187 return ss.str();
188 }
189 // Datasource as input
190 else if (fDataSource) {
191 const auto datasourceLabel = fDataSource->GetLabel();
192 return "Dataframe from datasource " + datasourceLabel;
193 }
194 // Trivial/empty datasource
195 else {
196 const auto n = fLoopManager->GetNEmptyEntries();
197 if (n == 1) {
198 return "Empty dataframe filling 1 row";
199 } else {
200 return "Empty dataframe filling " + std::to_string(n) + " rows";
201 }
202 }
203 }
204
205public:
206 ////////////////////////////////////////////////////////////////////////////
207 /// \brief Copy-assignment operator for RInterface.
208 RInterface &operator=(const RInterface &) = default;
209
210 ////////////////////////////////////////////////////////////////////////////
211 /// \brief Copy-ctor for RInterface.
212 RInterface(const RInterface &) = default;
213
214 ////////////////////////////////////////////////////////////////////////////
215 /// \brief Move-ctor for RInterface.
216 RInterface(RInterface &&) = default;
217
218 ////////////////////////////////////////////////////////////////////////////
219 /// \brief Move-assignment operator for RInterface.
221
222 ////////////////////////////////////////////////////////////////////////////
223 /// \brief Build a RInterface from a RLoopManager.
224 /// This constructor is only available for RInterface<RLoopManager>.
226 RInterface(const std::shared_ptr<RLoopManager> &proxied)
227 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource()), fColRegister(proxied)
228 {
230 }
231
232 ////////////////////////////////////////////////////////////////////////////
233 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
234 /// Different RDataFrame methods return different C++ types. All nodes, however,
235 /// can be cast to this common type at the cost of a small performance penalty.
236 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
237 /// around via (non-template, C++11) helper functions.
238 /// Example usage:
239 /// ~~~{.cpp}
240 /// // a function that conditionally adds a Range to a RDataFrame node.
241 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
242 /// {
243 /// return mustAddRange ? df.Range(1) : df;
244 /// }
245 /// // use as :
246 /// ROOT::RDataFrame df(10);
247 /// auto maybeRanged = MaybeAddRange(df, true);
248 /// ~~~
249 /// Note that it is not a problem to pass RNode's by value.
250 operator RNode() const
251 {
252 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister,
254 }
255
256 ////////////////////////////////////////////////////////////////////////////
257 /// \brief Append a filter to the call graph.
258 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
259 /// signalling whether the event has passed the selection (true) or not (false).
260 /// \param[in] columns Names of the columns/branches in input to the filter function.
261 /// \param[in] name Optional name of this filter. See `Report`.
262 /// \return the filter node of the computation graph.
263 ///
264 /// Append a filter node at the point of the call graph corresponding to the
265 /// object this method is called on.
266 /// The callable `f` should not have side-effects (e.g. modification of an
267 /// external or static variable) to ensure correct results when implicit
268 /// multi-threading is active.
269 ///
270 /// RDataFrame only evaluates filters when necessary: if multiple filters
271 /// are chained one after another, they are executed in order and the first
272 /// one returning false causes the event to be discarded.
273 /// Even if multiple actions or transformations depend on the same filter,
274 /// it is executed once per entry. If its result is requested more than
275 /// once, the cached result is served.
276 ///
277 /// ### Example usage:
278 /// ~~~{.cpp}
279 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
280 /// auto filtered = df.Filter(myCut, {"x", "y"});
281 ///
282 /// // String: it must contain valid C++ except that column names can be used instead of variable names
283 /// auto filtered = df.Filter("x*y > 0");
284 /// ~~~
287 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
288 {
289 RDFInternal::CheckFilter(f);
290 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
291 constexpr auto nColumns = ColTypes_t::list_size;
292 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
293 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
294
296
297 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
298 fLoopManager->Book(filterPtr.get());
299 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister, fDataSource);
300 }
301
302 ////////////////////////////////////////////////////////////////////////////
303 /// \brief Append a filter to the call graph.
304 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
305 /// signalling whether the event has passed the selection (true) or not (false).
306 /// \param[in] name Optional name of this filter. See `Report`.
307 /// \return the filter node of the computation graph.
308 ///
309 /// Refer to the first overload of this method for the full documentation.
312 {
313 // The sfinae is there in order to pick up the overloaded method which accepts two strings
314 // rather than this template method.
315 return Filter(f, {}, name);
316 }
317
318 ////////////////////////////////////////////////////////////////////////////
319 /// \brief Append a filter to the call graph.
320 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
321 /// signalling whether the event has passed the selection (true) or not (false).
322 /// \param[in] columns Names of the columns/branches in input to the filter function.
323 /// \return the filter node of the computation graph.
324 ///
325 /// Refer to the first overload of this method for the full documentation.
326 template <typename F>
327 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
328 {
329 return Filter(f, ColumnNames_t{columns});
330 }
331
332 ////////////////////////////////////////////////////////////////////////////
333 /// \brief Append a filter to the call graph.
334 /// \param[in] expression The filter expression in C++
335 /// \param[in] name Optional name of this filter. See `Report`.
336 /// \return the filter node of the computation graph.
337 ///
338 /// The expression is just-in-time compiled and used to filter entries. It must
339 /// be valid C++ syntax in which variable names are substituted with the names
340 /// of branches/columns.
341 ///
342 /// ### Example usage:
343 /// ~~~{.cpp}
344 /// auto filtered_df = df.Filter("myCollection.size() > 3");
345 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
346 /// ~~~
348 {
349 // deleted by the jitted call to JitFilterHelper
350 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
351 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
352 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
353 const auto jittedFilter =
356
357 fLoopManager->Book(jittedFilter.get());
360 }
361
362 // clang-format off
363 ////////////////////////////////////////////////////////////////////////////
364 /// \brief Define a new column.
365 /// \param[in] name The name of the defined column.
366 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
367 /// \param[in] columns Names of the columns/branches in input to the producer function.
368 /// \return the first node of the computation graph for which the new quantity is defined.
369 ///
370 /// Define a column that will be visible from all subsequent nodes
371 /// of the functional chain. The `expression` is only evaluated for entries that pass
372 /// all the preceding filters.
373 /// A new variable is created called `name`, accessible as if it was contained
374 /// in the dataset from subsequent transformations/actions.
375 ///
376 /// Use cases include:
377 /// * caching the results of complex calculations for easy and efficient multiple access
378 /// * extraction of quantities of interest from complex objects
379 ///
380 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
381 ///
382 /// ### Example usage:
383 /// ~~~{.cpp}
384 /// // assuming a function with signature:
385 /// double myComplexCalculation(const RVec<float> &muon_pts);
386 /// // we can pass it directly to Define
387 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
388 /// // alternatively, we can pass the body of the function as a string, as in Filter:
389 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
390 /// ~~~
393 {
394 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Define");
395 }
396 // clang-format on
397
398 // clang-format off
399 ////////////////////////////////////////////////////////////////////////////
400 /// \brief Define a new column with a value dependent on the processing slot.
401 /// \param[in] name The name of the defined column.
402 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
403 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
404 /// \return the first node of the computation graph for which the new quantity is defined.
405 ///
406 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
407 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
408 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
409 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
410 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
411 ///
412 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
413 /// ~~~{.cpp}
414 /// int function(unsigned int, double, double);
415 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
416 /// df.DefineSlot("x", function, {"column1", "column2"})
417 /// ~~~
418 ///
419 /// See Define for more information.
420 template <typename F>
422 {
423 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "DefineSlot");
424 }
425 // clang-format on
426
427 // clang-format off
428 ////////////////////////////////////////////////////////////////////////////
429 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
430 /// \param[in] name The name of the defined column.
431 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
432 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
433 /// \return the first node of the computation graph for which the new quantity is defined.
434 ///
435 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
436 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
437 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
438 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
439 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
440 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
441 ///
442 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
443 /// ~~~{.cpp}
444 /// int function(unsigned int, ULong64_t, double, double);
445 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
446 /// DefineSlotEntry("x", function, {"column1", "column2"})
447 /// ~~~
448 ///
449 /// See Define for more information.
450 template <typename F>
452 {
453 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
454 "DefineSlotEntry");
455 }
456 // clang-format on
457
458 ////////////////////////////////////////////////////////////////////////////
459 /// \brief Define a new column.
460 /// \param[in] name The name of the defined column.
461 /// \param[in] expression An expression in C++ which represents the defined value
462 /// \return the first node of the computation graph for which the new quantity is defined.
463 ///
464 /// The expression is just-in-time compiled and used to produce the column entries.
465 /// It must be valid C++ syntax in which variable names are substituted with the names
466 /// of branches/columns.
467 ///
468 /// Refer to the first overload of this method for the full documentation.
470 {
471 constexpr auto where = "Define";
473 // these checks must be done before jitting lest we throw exceptions in jitted code
476
477 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
478 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
479 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
480 fLoopManager->Book(jittedDefine.get());
481
483 newCols.AddColumn(jittedDefine);
484
485 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
486
487 return newInterface;
488 }
489
490 ////////////////////////////////////////////////////////////////////////////
491 /// \brief Overwrite the value and/or type of an existing column.
492 /// \param[in] name The name of the column to redefine.
493 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
494 /// \param[in] columns Names of the columns/branches in input to the expression.
495 /// \return the first node of the computation graph for which the quantity is redefined.
496 ///
497 /// The old value of the column can be used as an input for the expression.
498 ///
499 /// An exception is thrown in case the column to redefine does not already exist.
500 /// See Define() for more information.
503 {
504 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Redefine");
505 }
506
507 // clang-format off
508 ////////////////////////////////////////////////////////////////////////////
509 /// \brief Overwrite the value and/or type of an existing column.
510 /// \param[in] name The name of the column to redefine.
511 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
512 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
513 /// \return the first node of the computation graph for which the new quantity is defined.
514 ///
515 /// The old value of the column can be used as an input for the expression.
516 /// An exception is thrown in case the column to redefine does not already exist.
517 ///
518 /// See DefineSlot() for more information.
519 // clang-format on
520 template <typename F>
522 {
523 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "RedefineSlot");
524 }
525
526 // clang-format off
527 ////////////////////////////////////////////////////////////////////////////
528 /// \brief Overwrite the value and/or type of an existing column.
529 /// \param[in] name The name of the column to redefine.
530 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
531 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
532 /// \return the first node of the computation graph for which the new quantity is defined.
533 ///
534 /// The old value of the column can be used as an input for the expression.
535 /// An exception is thrown in case the column to re-define does not already exist.
536 ///
537 /// See DefineSlotEntry() for more information.
538 // clang-format on
539 template <typename F>
541 {
542 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
543 "RedefineSlotEntry");
544 }
545
546 ////////////////////////////////////////////////////////////////////////////
547 /// \brief Overwrite the value and/or type of an existing column.
548 /// \param[in] name The name of the column to redefine.
549 /// \param[in] expression An expression in C++ which represents the defined value
550 /// \return the first node of the computation graph for which the new quantity is defined.
551 ///
552 /// The expression is just-in-time compiled and used to produce the column entries.
553 /// It must be valid C++ syntax in which variable names are substituted with the names
554 /// of branches/columns.
555 ///
556 /// The old value of the column can be used as an input for the expression.
557 /// An exception is thrown in case the column to re-define does not already exist.
558 ///
559 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
561 {
562 constexpr auto where = "Redefine";
567
568 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
569 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
570 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
571 fLoopManager->Book(jittedDefine.get());
572
574 newCols.AddColumn(jittedDefine);
575
576 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
577
578 return newInterface;
579 }
580
581 // clang-format off
582 ////////////////////////////////////////////////////////////////////////////
583 /// \brief Define a new column that is updated when the input sample changes.
584 /// \param[in] name The name of the defined column.
585 /// \param[in] expression A C++ callable that computes the new value of the defined column.
586 /// \return the first node of the computation graph for which the new quantity is defined.
587 ///
588 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
589 /// where:
590 /// - `T` is the type of the defined column
591 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
592 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
593 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
594 /// being processed (see the class docs for more information).
595 ///
596 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
597 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
598 /// starts rather than at every entry.
599 ///
600 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
601 ///
602 /// ### Example usage:
603 /// ~~~{.cpp}
604 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
605 /// df.DefinePerSample("weightbysample",
606 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
607 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
608 /// ~~~
609 // clang-format on
610 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
611 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
613 {
614 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
617
618 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
619 if (retTypeName.empty()) {
620 // The type is not known to the interpreter.
621 // We must not error out here, but if/when this column is used in jitted code
622 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
623 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
624 }
625
626 auto newColumn =
627 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
628
629 auto updateDefinePerSample = [newColumn](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
630 newColumn->Update(slot, id);
631 };
632 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
633
635 newCols.AddColumn(std::move(newColumn));
636 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
637 return newInterface;
638 }
639
640 // clang-format off
641 ////////////////////////////////////////////////////////////////////////////
642 /// \brief Define a new column that is updated when the input sample changes.
643 /// \param[in] name The name of the defined column.
644 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
645 /// \return the first node of the computation graph for which the new quantity is defined.
646 ///
647 /// The expression is just-in-time compiled and used to produce the column entries.
648 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
649 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
650 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
651 ///
652 /// ### Example usage:
653 /// ~~~{.py}
654 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
655 /// df.DefinePerSample("weightbysample", "rdfsampleinfo_.Contains('sample1') ? 1.0f : 2.0f")
656 /// ~~~
657 ///
658 /// \note
659 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
660 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
661 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
662 /// ~~~{.py}
663 /// ROOT.gInterpreter.Declare(
664 /// """
665 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
666 /// return id.Contains("sample1") ? 1.0f : 2.0f;
667 /// }
668 /// """)
669 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
670 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
671 /// ~~~
672 ///
673 /// \note
674 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
675 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
676 /// sample even starts, so column values are not accessible.
677 // clang-format on
679 {
680 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
681 // these checks must be done before jitting lest we throw exceptions in jitted code
684
685 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
686 auto jittedDefine =
687 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
688 auto updateDefinePerSample = [jittedDefine](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
689 jittedDefine->Update(slot, id);
690 };
691 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
692
694 newCols.AddColumn(jittedDefine);
695
696 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
697
698 return newInterface;
699 }
700
701 /// \brief Register systematic variations for an existing column.
702 /// \param[in] colName name of the column for which varied values are provided.
703 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
704 /// take any column values as input, similarly to what happens with Filter and Define calls. It must
705 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
706 /// \param[in] inputColumns the names of the columns to be passed to the callable.
707 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
708 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
709 ///
710 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
711 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
712 /// results that depend on any varied quantity a map/dictionary of varied results can be produced with
713 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
714 ///
715 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
716 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
717 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
718 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
719 ///
720 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
721 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
722 /// ~~~{.cpp}
723 /// auto nominal_hx =
724 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
725 /// .Filter("pt > k")
726 /// .Define("x", someFunc, {"pt"})
727 /// .Histo1D("x");
728 ///
729 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
730 /// hx["nominal"].Draw();
731 /// hx["pt:down"].Draw("SAME");
732 /// ~~~
733 template <typename F>
734 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
735 const std::vector<std::string> &variationTags, std::string_view variationName = "")
736 {
737 std::vector<std::string> colNames{{std::string(colName)}};
738 const std::string theVariationName{variationName.empty() ? colName : variationName};
739
740 return Vary(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, theVariationName);
741 }
742
743 /// \brief Register systematic variations for an existing columns using auto-generated variation tags.
744 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
745 /// will be auto-generated as the sequence 0...nVariations-1.
746 /// See the documentation of the previous overload for more information.
747 template <typename F>
748 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
749 std::size_t nVariations, std::string_view variationName = "")
750 {
751 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
752
753 std::vector<std::string> variationTags;
754 variationTags.reserve(nVariations);
755 for (std::size_t i = 0u; i < nVariations; ++i)
756 variationTags.emplace_back(std::to_string(i));
757
758 const std::string theVariationName{variationName.empty() ? colName : variationName};
759
760 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
761 }
762
763 /// \brief Register a systematic variation that affects multiple columns simultaneously.
764 /// This overload of Vary takes a list of column names as first argument rather than a single name and
765 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
766 /// affected column.
767 /// See the documentation of the first Vary overload for more information.
768 ///
769 /// Example usage:
770 /// ~~~{.cpp}
771 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
772 /// df.Vary({"pt", "eta"},
773 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
774 /// {"down", "up"},
775 /// "ptAndEta");
776 /// ~~~
777 template <typename F>
779 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
780 const std::vector<std::string> &variationTags, std::string_view variationName)
781 {
782 using F_t = std::decay_t<F>;
783 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
784 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
785 constexpr auto nColumns = ColTypes_t::list_size;
786
787 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
788
789 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
790 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
791
792 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
793 if (retTypeName.empty()) {
794 // The type is not known to the interpreter, but we don't want to error out
795 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
796 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
797 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
798 }
799
800 auto variation = std::make_shared<RDFInternal::RVariation<F_t>>(
801 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
802 validColumnNames);
803 fLoopManager->Book(variation.get());
804
806 newCols.AddVariation(variation);
807
808 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
809
810 return newInterface;
811 }
812
813 /// \brief Register systematic variations for one or more existing columns using auto-generated tags.
814 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
815 /// will be auto-generated as the sequence 0...nVariations-1.
816 /// See the documentation of the previous overload for more information.
817 template <typename F>
819 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
820 std::size_t nVariations, std::string_view variationName)
821 {
822 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
823
824 std::vector<std::string> variationTags;
825 variationTags.reserve(nVariations);
826 for (std::size_t i = 0u; i < nVariations; ++i)
827 variationTags.emplace_back(std::to_string(i));
828
829 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
830 }
831
832 /// \brief Register systematic variations for an existing column.
833 /// \param[in] colName name of the column for which varied values are provided.
834 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
835 /// values for the specified column.
836 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
837 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
838 /// colName is used if none is provided.
839 ///
840 /// ~~~{.cpp}
841 /// auto nominal_hx =
842 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
843 /// .Filter("pt > k")
844 /// .Define("x", someFunc, {"pt"})
845 /// .Histo1D("x");
846 ///
847 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
848 /// hx["nominal"].Draw();
849 /// hx["pt:down"].Draw("SAME");
850 /// ~~~
852 const std::vector<std::string> &variationTags, std::string_view variationName = "")
853 {
854 std::vector<std::string> colNames{{std::string(colName)}};
855 const std::string theVariationName{variationName.empty() ? colName : variationName};
856
857 return Vary(std::move(colNames), expression, variationTags, theVariationName);
858 }
859
860 /// \brief Register systematic variations for an existing column.
861 /// \param[in] colName name of the column for which varied values are provided.
862 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
863 /// values for the specified column.
864 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
865 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
866 /// colName is used if none is provided.
867 ///
868 /// See the documentation for the previous overload for more information.
869 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
870 std::string_view variationName = "")
871 {
872 std::vector<std::string> colNames{{std::string(colName)}};
873 const std::string theVariationName{variationName.empty() ? colName : variationName};
874
875 return Vary(std::move(colNames), expression, nVariations, theVariationName);
876 }
877
878 /// \brief Register systematic variations for one or more existing columns.
879 /// \param[in] colNames names of the columns for which varied values are provided.
880 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
881 /// values for the specified columns.
882 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
883 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
884 ///
885 /// ~~~{.cpp}
886 /// auto nominal_hx =
887 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
888 /// .Histo1D("x", "y");
889 ///
890 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
891 /// hx["nominal"].Draw();
892 /// hx["xy:0"].Draw("SAME");
893 /// hx["xy:1"].Draw("SAME");
894 /// ~~~
895 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
896 std::size_t nVariations, std::string_view variationName)
897 {
898 std::vector<std::string> variationTags;
899 variationTags.reserve(nVariations);
900 for (std::size_t i = 0u; i < nVariations; ++i)
901 variationTags.emplace_back(std::to_string(i));
902
903 return Vary(colNames, expression, std::move(variationTags), variationName);
904 }
905
906 /// \brief Register systematic variations for one or more existing columns.
907 /// \param[in] colNames names of the columns for which varied values are provided.
908 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
909 /// values for the specified columns.
910 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
911 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
912 ///
913 /// ~~~{.cpp}
914 /// auto nominal_hx =
915 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
916 /// .Histo1D("x", "y");
917 ///
918 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
919 /// hx["nominal"].Draw();
920 /// hx["xy:down"].Draw("SAME");
921 /// hx["xy:up"].Draw("SAME");
922 /// ~~~
923 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
924 const std::vector<std::string> &variationTags, std::string_view variationName)
925 {
926 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
927 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
928 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
929
930 for (auto &colName : colNames) {
931 RDFInternal::CheckValidCppVarName(colName, "Vary");
934 }
935 RDFInternal::CheckValidCppVarName(variationName, "Vary");
936
937 // when varying multiple columns, they must be different columns
938 if (colNames.size() > 1) {
939 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
940 if (uniqueCols.size() != colNames.size())
941 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
942 }
943
944 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
945 auto jittedVariation =
946 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
947 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap);
948 fLoopManager->Book(jittedVariation.get());
949
951 newColRegister.AddVariation(std::move(jittedVariation));
952
953 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister), fDataSource);
954
955 return newInterface;
956 }
957
958 ////////////////////////////////////////////////////////////////////////////
959 /// \brief Allow to refer to a column with a different name.
960 /// \param[in] alias name of the column alias
961 /// \param[in] columnName of the column to be aliased
962 /// \return the first node of the computation graph for which the alias is available.
963 ///
964 /// Aliasing an alias is supported.
965 ///
966 /// ### Example usage:
967 /// ~~~{.cpp}
968 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
969 /// ~~~
971 {
972 // The symmetry with Define is clear. We want to:
973 // - Create globally the alias and return this very node, unchanged
974 // - Make aliases accessible based on chains and not globally
975
976 // Helper to find out if a name is a column
977 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
978
979 constexpr auto where = "Alias";
981 // If the alias name is a column name, there is a problem
983
984 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
985
987 newCols.AddAlias(alias, validColumnName);
988
989 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
990
991 return newInterface;
992 }
993
994 ////////////////////////////////////////////////////////////////////////////
995 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
996 /// \tparam ColumnTypes variadic list of branch/column types.
997 /// \param[in] treename The name of the output TTree.
998 /// \param[in] filename The name of the output TFile.
999 /// \param[in] columnList The list of names of the columns/branches to be written.
1000 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1001 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1002 ///
1003 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1004 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1005 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1006 /// written out and it appears before the array in the columnList.
1007 ///
1008 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1009 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1010 /// present, by default all friend top-level branches that have names that do not collide with
1011 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1012 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1013 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1014 ///
1015 /// ### Writing to a sub-directory
1016 ///
1017 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1018 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1019 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1020 ///
1021 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1022 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
1023 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
1024 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1025 /// error out if such a "shuffled" TTree is used in a friendship.
1026 ///
1027 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
1028 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
1029 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
1030 ///
1031 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1032 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1033 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1034 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1035 ///
1036 /// ### Example invocations:
1037 ///
1038 /// ~~~{.cpp}
1039 /// // without specifying template parameters (column types automatically deduced)
1040 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1041 ///
1042 /// // specifying template parameters ("x" is `int`, "y" is `float`)
1043 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1044 /// ~~~
1045 ///
1046 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1047 /// `RSnapshotOptions`:
1048 /// ~~~{.cpp}
1049 /// RSnapshotOptions opts;
1050 /// opts.fLazy = true;
1051 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1052 /// ~~~
1053 template <typename... ColumnTypes>
1056 const RSnapshotOptions &options = RSnapshotOptions())
1057 {
1058 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1059 }
1060
1061 ////////////////////////////////////////////////////////////////////////////
1062 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1063 /// \param[in] treename The name of the output TTree.
1064 /// \param[in] filename The name of the output TFile.
1065 /// \param[in] columnList The list of names of the columns/branches to be written.
1066 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1067 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1068 ///
1069 /// This function returns a `RDataFrame` built with the output tree as a source.
1070 /// The types of the columns are automatically inferred and do not need to be specified.
1071 ///
1072 /// See above for a more complete description and example usages.
1074 const ColumnNames_t &columnList,
1075 const RSnapshotOptions &options = RSnapshotOptions())
1076 {
1077 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1078 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1080
1081 const auto fullTreeName = treename;
1082 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1083 treename = parsedTreePath.fTreeName;
1084 const auto &dirname = parsedTreePath.fDirName;
1085
1086 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1087 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
1088
1090 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
1091
1092 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1093 validCols, newRDF, snapHelperArgs, validCols.size());
1094
1095 if (!options.fLazy)
1096 *resPtr;
1097 return resPtr;
1098 }
1099
1100 // clang-format off
1101 ////////////////////////////////////////////////////////////////////////////
1102 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1103 /// \param[in] treename The name of the output TTree.
1104 /// \param[in] filename The name of the output TFile.
1105 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1106 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1107 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1108 ///
1109 /// This function returns a `RDataFrame` built with the output tree as a source.
1110 /// The types of the columns are automatically inferred and do not need to be specified.
1111 ///
1112 /// See above for a more complete description and example usages.
1114 std::string_view columnNameRegexp = "",
1115 const RSnapshotOptions &options = RSnapshotOptions())
1116 {
1117 const auto definedColumns = fColRegister.GetNames();
1118 auto *tree = fLoopManager->GetTree();
1119 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1120 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1121 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1122 ColumnNames_t dsColumnsWithoutSizeColumns;
1123 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1124 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1125 ColumnNames_t columnNames;
1126 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1127 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1128 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1129 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1130
1131 // De-duplicate column names. Currently the only way this can happen is if a column coming from a tree or
1132 // data-source is Redefine'd.
1133 std::set<std::string> uniqueCols(columnNames.begin(), columnNames.end());
1134 columnNames.assign(uniqueCols.begin(), uniqueCols.end());
1135
1136 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1137 return Snapshot(treename, filename, selectedColumns, options);
1138 }
1139 // clang-format on
1140
1141 // clang-format off
1142 ////////////////////////////////////////////////////////////////////////////
1143 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1144 /// \param[in] treename The name of the output TTree.
1145 /// \param[in] filename The name of the output TFile.
1146 /// \param[in] columnList The list of names of the columns/branches to be written.
1147 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1148 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1149 ///
1150 /// This function returns a `RDataFrame` built with the output tree as a source.
1151 /// The types of the columns are automatically inferred and do not need to be specified.
1152 ///
1153 /// See above for a more complete description and example usages.
1155 std::initializer_list<std::string> columnList,
1156 const RSnapshotOptions &options = RSnapshotOptions())
1157 {
1158 ColumnNames_t selectedColumns(columnList);
1159 return Snapshot(treename, filename, selectedColumns, options);
1160 }
1161 // clang-format on
1162
1163 ////////////////////////////////////////////////////////////////////////////
1164 /// \brief Save selected columns in memory.
1165 /// \tparam ColumnTypes variadic list of branch/column types.
1166 /// \param[in] columnList columns to be cached in memory.
1167 /// \return a `RDataFrame` that wraps the cached dataset.
1168 ///
1169 /// This action returns a new `RDataFrame` object, completely detached from
1170 /// the originating `RDataFrame`. The new dataframe only contains the cached
1171 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1172 ///
1173 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1174 /// fits in memory and that will be accessed many times.
1175 ///
1176 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1177 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1178 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1179 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1180 ///
1181 /// ### Example usage:
1182 ///
1183 /// **Types and columns specified:**
1184 /// ~~~{.cpp}
1185 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1186 /// ~~~
1187 ///
1188 /// **Types inferred and columns specified (this invocation relies on jitting):**
1189 /// ~~~{.cpp}
1190 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1191 /// ~~~
1192 ///
1193 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1194 /// ~~~{.cpp}
1195 /// auto cache_all_cols_df = df.Cache(myRegexp);
1196 /// ~~~
1197 template <typename... ColumnTypes>
1199 {
1200 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1201 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1202 }
1203
1204 ////////////////////////////////////////////////////////////////////////////
1205 /// \brief Save selected columns in memory.
1206 /// \param[in] columnList columns to be cached in memory
1207 /// \return a `RDataFrame` that wraps the cached dataset.
1208 ///
1209 /// See the previous overloads for more information.
1211 {
1212 // Early return: if the list of columns is empty, just return an empty RDF
1213 // If we proceed, the jitted call will not compile!
1214 if (columnList.empty()) {
1215 auto nEntries = *this->Count();
1216 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1217 return emptyRDF;
1218 }
1219
1220 std::stringstream cacheCall;
1221 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1222 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1224 // build a string equivalent to
1225 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1226 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1227 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1229 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1230 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1231
1232 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1233
1234 const auto validColumnNames =
1235 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1236 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1237 "Cache", /*vector2rvec=*/false);
1238 for (const auto &colType : colTypes)
1239 cacheCall << colType << ", ";
1240 if (!columnListWithoutSizeColumns.empty())
1241 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1242 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1243 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1244
1245 // book the code to jit with the RLoopManager and trigger the event loop
1246 fLoopManager->ToJitExec(cacheCall.str());
1247 fLoopManager->Jit();
1248
1249 return resRDF;
1250 }
1251
1252 ////////////////////////////////////////////////////////////////////////////
1253 /// \brief Save selected columns in memory.
1254 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1255 /// \return a `RDataFrame` that wraps the cached dataset.
1256 ///
1257 /// The existing columns are matched against the regular expression. If the string provided
1258 /// is empty, all columns are selected. See the previous overloads for more information.
1260 {
1261 const auto definedColumns = fColRegister.GetNames();
1262 auto *tree = fLoopManager->GetTree();
1263 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1264 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1265 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1266 ColumnNames_t dsColumnsWithoutSizeColumns;
1267 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1268 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1269 ColumnNames_t columnNames;
1270 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1271 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1272 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1273 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1274 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1275 return Cache(selectedColumns);
1276 }
1277
1278 ////////////////////////////////////////////////////////////////////////////
1279 /// \brief Save selected columns in memory.
1280 /// \param[in] columnList columns to be cached in memory.
1281 /// \return a `RDataFrame` that wraps the cached dataset.
1282 ///
1283 /// See the previous overloads for more information.
1284 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1285 {
1286 ColumnNames_t selectedColumns(columnList);
1287 return Cache(selectedColumns);
1288 }
1289
1290 // clang-format off
1291 ////////////////////////////////////////////////////////////////////////////
1292 /// \brief Creates a node that filters entries based on range: [begin, end).
1293 /// \param[in] begin Initial entry number considered for this range.
1294 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1295 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1296 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1297 ///
1298 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1299 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1300 ///
1301 /// ### Example usage:
1302 /// ~~~{.cpp}
1303 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1304 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1305 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1306 /// ~~~
1307 // clang-format on
1308 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1309 {
1310 // check invariants
1311 if (stride == 0 || (end != 0 && end < begin))
1312 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1313 CheckIMTDisabled("Range");
1314
1316 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1317 fLoopManager->Book(rangePtr.get());
1319 return tdf_r;
1320 }
1321
1322 // clang-format off
1323 ////////////////////////////////////////////////////////////////////////////
1324 /// \brief Creates a node that filters entries based on range.
1325 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1326 /// \return a node of the computation graph for which the range is defined.
1327 ///
1328 /// See the other Range overload for a detailed description.
1329 // clang-format on
1330 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1331
1332 // clang-format off
1333 ////////////////////////////////////////////////////////////////////////////
1334 /// \brief Execute a user-defined function on each entry (*instant action*).
1335 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1336 /// \param[in] columns Names of the columns/branches in input to the user function.
1337 ///
1338 /// The callable `f` is invoked once per entry. This is an *instant action*:
1339 /// upon invocation, an event loop as well as execution of all scheduled actions
1340 /// is triggered.
1341 /// Users are responsible for the thread-safety of this callable when executing
1342 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1343 ///
1344 /// ### Example usage:
1345 /// ~~~{.cpp}
1346 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1347 /// ~~~
1348 // clang-format on
1349 template <typename F>
1350 void Foreach(F f, const ColumnNames_t &columns = {})
1351 {
1352 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1353 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1354 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1355 }
1356
1357 // clang-format off
1358 ////////////////////////////////////////////////////////////////////////////
1359 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1360 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1361 /// \param[in] columns Names of the columns/branches in input to the user function.
1362 ///
1363 /// Same as `Foreach`, but the user-defined function takes an extra
1364 /// `unsigned int` as its first parameter, the *processing slot index*.
1365 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1366 /// for each thread of execution.
1367 /// This is meant as a helper in writing thread-safe `Foreach`
1368 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1369 /// The user-defined processing callable is able to follow different
1370 /// *streams of processing* indexed by the first parameter.
1371 /// `ForeachSlot` works just as well with single-thread execution: in that
1372 /// case `slot` will always be `0`.
1373 ///
1374 /// ### Example usage:
1375 /// ~~~{.cpp}
1376 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1377 /// ~~~
1378 // clang-format on
1379 template <typename F>
1380 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1381 {
1382 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1383 constexpr auto nColumns = ColTypes_t::list_size;
1384
1385 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1386 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1387
1388 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1390
1391 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1392 fLoopManager->Book(action.get());
1393
1394 fLoopManager->Run();
1395 }
1396
1397 // clang-format off
1398 ////////////////////////////////////////////////////////////////////////////
1399 /// \brief Execute a user-defined reduce operation on the values of a column.
1400 /// \tparam F The type of the reduce callable. Automatically deduced.
1401 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1402 /// \param[in] f A callable with signature `T(T,T)`
1403 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1404 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1405 ///
1406 /// A reduction takes two values of a column and merges them into one (e.g.
1407 /// by summing them, taking the maximum, etc). This action performs the
1408 /// specified reduction operation on all processed column values, returning
1409 /// a single value of the same type. The callable f must satisfy the general
1410 /// requirements of a *processing function* besides having signature `T(T,T)`
1411 /// where `T` is the type of column columnName.
1412 ///
1413 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1414 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1415 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1416 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1417 /// overload.
1418 ///
1419 /// ### Example usage:
1420 /// ~~~{.cpp}
1421 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1422 /// ~~~
1423 ///
1424 /// This action is *lazy*: upon invocation of this method the calculation is
1425 /// booked but not executed. Also see RResultPtr.
1426 // clang-format on
1427 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1429 {
1430 static_assert(
1432 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1433 return Reduce(std::move(f), columnName, T());
1434 }
1435
1436 ////////////////////////////////////////////////////////////////////////////
1437 /// \brief Execute a user-defined reduce operation on the values of a column.
1438 /// \tparam F The type of the reduce callable. Automatically deduced.
1439 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1440 /// \param[in] f A callable with signature `T(T,T)`
1441 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1442 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1443 /// \return the reduced quantity wrapped in a RResultPtr.
1444 ///
1445 /// ### Example usage:
1446 /// ~~~{.cpp}
1447 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1448 /// ~~~
1449 /// See the description of the first Reduce overload for more information.
1450 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1451 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1452 {
1453 return Aggregate(f, f, columnName, redIdentity);
1454 }
1455
1456 ////////////////////////////////////////////////////////////////////////////
1457 /// \brief Return the number of entries processed (*lazy action*).
1458 /// \return the number of entries wrapped in a RResultPtr.
1459 ///
1460 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1461 /// This action is *lazy*: upon invocation of this method the calculation is
1462 /// booked but not executed. Also see RResultPtr.
1463 ///
1464 /// ### Example usage:
1465 /// ~~~{.cpp}
1466 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1467 /// ~~~
1468 ///
1470 {
1471 const auto nSlots = fLoopManager->GetNSlots();
1472 auto cSPtr = std::make_shared<ULong64_t>(0);
1473 using Helper_t = RDFInternal::CountHelper;
1475 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1477 fLoopManager->Book(action.get());
1478 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1479 }
1480
1481 ////////////////////////////////////////////////////////////////////////////
1482 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1483 /// \tparam T The type of the column.
1484 /// \tparam COLL The type of collection used to store the values.
1485 /// \param[in] column The name of the column to collect the values of.
1486 /// \return the content of the selected column wrapped in a RResultPtr.
1487 ///
1488 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1489 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1490 /// ### Example usage:
1491 /// ~~~{.cpp}
1492 /// // In this case intCol is a std::vector<int>
1493 /// auto intCol = rdf.Take<int>("integerColumn");
1494 /// // Same content as above but in this case taken as a RVec<int>
1495 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1496 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1497 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1498 /// ~~~
1499 /// This action is *lazy*: upon invocation of this method the calculation is
1500 /// booked but not executed. Also see RResultPtr.
1501 template <typename T, typename COLL = std::vector<T>>
1503 {
1504 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1505
1506 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1507 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1508
1509 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1511 auto valuesPtr = std::make_shared<COLL>();
1512 const auto nSlots = fLoopManager->GetNSlots();
1513
1514 auto action =
1515 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1516 fLoopManager->Book(action.get());
1517 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1518 }
1519
1520 ////////////////////////////////////////////////////////////////////////////
1521 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1522 /// \tparam V The type of the column used to fill the histogram.
1523 /// \param[in] model The returned histogram will be constructed using this as a model.
1524 /// \param[in] vName The name of the column that will fill the histogram.
1525 /// \return the monodimensional histogram wrapped in a RResultPtr.
1526 ///
1527 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1528 /// is filled with each one of the elements of the container. In case multiple columns of container type
1529 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1530 /// possibly different lengths between events).
1531 /// This action is *lazy*: upon invocation of this method the calculation is
1532 /// booked but not executed. Also see RResultPtr.
1533 ///
1534 /// ### Example usage:
1535 /// ~~~{.cpp}
1536 /// // Deduce column type (this invocation needs jitting internally)
1537 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1538 /// // Explicit column type
1539 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1540 /// ~~~
1541 ///
1542 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1543 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1544 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1545 template <typename V = RDFDetail::RInferredType>
1546 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1547 {
1548 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1549
1550 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1551
1552 std::shared_ptr<::TH1D> h(nullptr);
1553 {
1554 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1555 h = model.GetHistogram();
1556 h->SetDirectory(nullptr);
1557 }
1558
1559 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1560 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1561 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h);
1562 }
1563
1564 ////////////////////////////////////////////////////////////////////////////
1565 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1566 /// \tparam V The type of the column used to fill the histogram.
1567 /// \param[in] vName The name of the column that will fill the histogram.
1568 /// \return the monodimensional histogram wrapped in a RResultPtr.
1569 ///
1570 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1571 /// The "name" and "title" strings are built starting from the input column name.
1572 /// See the description of the first Histo1D() overload for more details.
1573 ///
1574 /// ### Example usage:
1575 /// ~~~{.cpp}
1576 /// // Deduce column type (this invocation needs jitting internally)
1577 /// auto myHist1 = myDf.Histo1D("myColumn");
1578 /// // Explicit column type
1579 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1580 /// ~~~
1581 template <typename V = RDFDetail::RInferredType>
1583 {
1584 const auto h_name = std::string(vName);
1585 const auto h_title = h_name + ";" + h_name + ";count";
1586 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1587 }
1588
1589 ////////////////////////////////////////////////////////////////////////////
1590 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1591 /// \tparam V The type of the column used to fill the histogram.
1592 /// \tparam W The type of the column used as weights.
1593 /// \param[in] model The returned histogram will be constructed using this as a model.
1594 /// \param[in] vName The name of the column that will fill the histogram.
1595 /// \param[in] wName The name of the column that will provide the weights.
1596 /// \return the monodimensional histogram wrapped in a RResultPtr.
1597 ///
1598 /// See the description of the first Histo1D() overload for more details.
1599 ///
1600 /// ### Example usage:
1601 /// ~~~{.cpp}
1602 /// // Deduce column type (this invocation needs jitting internally)
1603 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1604 /// // Explicit column type
1605 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1606 /// ~~~
1607 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1609 {
1610 const std::vector<std::string_view> columnViews = {vName, wName};
1611 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1612 ? ColumnNames_t()
1613 : ColumnNames_t(columnViews.begin(), columnViews.end());
1614 std::shared_ptr<::TH1D> h(nullptr);
1615 {
1616 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1617 h = model.GetHistogram();
1618 }
1619 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h);
1620 }
1621
1622 ////////////////////////////////////////////////////////////////////////////
1623 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1624 /// \tparam V The type of the column used to fill the histogram.
1625 /// \tparam W The type of the column used as weights.
1626 /// \param[in] vName The name of the column that will fill the histogram.
1627 /// \param[in] wName The name of the column that will provide the weights.
1628 /// \return the monodimensional histogram wrapped in a RResultPtr.
1629 ///
1630 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1631 /// The "name" and "title" strings are built starting from the input column names.
1632 /// See the description of the first Histo1D() overload for more details.
1633 ///
1634 /// ### Example usage:
1635 /// ~~~{.cpp}
1636 /// // Deduce column types (this invocation needs jitting internally)
1637 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1638 /// // Explicit column types
1639 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1640 /// ~~~
1641 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1643 {
1644 // We build name and title based on the value and weight column names
1645 std::string str_vName{vName};
1646 std::string str_wName{wName};
1647 const auto h_name = str_vName + "_weighted_" + str_wName;
1648 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1649 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1650 }
1651
1652 ////////////////////////////////////////////////////////////////////////////
1653 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1654 /// \tparam V The type of the column used to fill the histogram.
1655 /// \tparam W The type of the column used as weights.
1656 /// \param[in] model The returned histogram will be constructed using this as a model.
1657 /// \return the monodimensional histogram wrapped in a RResultPtr.
1658 ///
1659 /// This overload will use the first two default columns as column names.
1660 /// See the description of the first Histo1D() overload for more details.
1661 template <typename V, typename W>
1662 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1663 {
1664 return Histo1D<V, W>(model, "", "");
1665 }
1666
1667 ////////////////////////////////////////////////////////////////////////////
1668 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1669 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1670 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1671 /// \param[in] model The returned histogram will be constructed using this as a model.
1672 /// \param[in] v1Name The name of the column that will fill the x axis.
1673 /// \param[in] v2Name The name of the column that will fill the y axis.
1674 /// \return the bidimensional histogram wrapped in a RResultPtr.
1675 ///
1676 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1677 /// is filled with each one of the elements of the container. In case multiple columns of container type
1678 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1679 /// possibly different lengths between events).
1680 /// This action is *lazy*: upon invocation of this method the calculation is
1681 /// booked but not executed. Also see RResultPtr.
1682 ///
1683 /// ### Example usage:
1684 /// ~~~{.cpp}
1685 /// // Deduce column types (this invocation needs jitting internally)
1686 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1687 /// // Explicit column types
1688 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1689 /// ~~~
1690 ///
1691 ///
1692 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1693 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1694 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1695 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1697 {
1698 std::shared_ptr<::TH2D> h(nullptr);
1699 {
1700 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1701 h = model.GetHistogram();
1702 }
1703 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1704 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1705 }
1706 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1707 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1708 ? ColumnNames_t()
1709 : ColumnNames_t(columnViews.begin(), columnViews.end());
1710 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h);
1711 }
1712
1713 ////////////////////////////////////////////////////////////////////////////
1714 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1715 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1716 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1717 /// \tparam W The type of the column used for the weights of the histogram.
1718 /// \param[in] model The returned histogram will be constructed using this as a model.
1719 /// \param[in] v1Name The name of the column that will fill the x axis.
1720 /// \param[in] v2Name The name of the column that will fill the y axis.
1721 /// \param[in] wName The name of the column that will provide the weights.
1722 /// \return the bidimensional histogram wrapped in a RResultPtr.
1723 ///
1724 /// This action is *lazy*: upon invocation of this method the calculation is
1725 /// booked but not executed. Also see RResultPtr.
1726 ///
1727 /// ### Example usage:
1728 /// ~~~{.cpp}
1729 /// // Deduce column types (this invocation needs jitting internally)
1730 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1731 /// // Explicit column types
1732 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1733 /// ~~~
1734 ///
1735 /// See the documentation of the first Histo2D() overload for more details.
1736 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1737 typename W = RDFDetail::RInferredType>
1740 {
1741 std::shared_ptr<::TH2D> h(nullptr);
1742 {
1743 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1744 h = model.GetHistogram();
1745 }
1746 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1747 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1748 }
1749 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1750 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1751 ? ColumnNames_t()
1752 : ColumnNames_t(columnViews.begin(), columnViews.end());
1753 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h);
1754 }
1755
1756 template <typename V1, typename V2, typename W>
1758 {
1759 return Histo2D<V1, V2, W>(model, "", "", "");
1760 }
1761
1762 ////////////////////////////////////////////////////////////////////////////
1763 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1764 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1765 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1766 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1767 /// \param[in] model The returned histogram will be constructed using this as a model.
1768 /// \param[in] v1Name The name of the column that will fill the x axis.
1769 /// \param[in] v2Name The name of the column that will fill the y axis.
1770 /// \param[in] v3Name The name of the column that will fill the z axis.
1771 /// \return the tridimensional histogram wrapped in a RResultPtr.
1772 ///
1773 /// This action is *lazy*: upon invocation of this method the calculation is
1774 /// booked but not executed. Also see RResultPtr.
1775 ///
1776 /// ### Example usage:
1777 /// ~~~{.cpp}
1778 /// // Deduce column types (this invocation needs jitting internally)
1779 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1780 /// "myValueX", "myValueY", "myValueZ");
1781 /// // Explicit column types
1782 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1783 /// "myValueX", "myValueY", "myValueZ");
1784 /// ~~~
1785 ///
1786 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1787 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1788 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1789 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1790 typename V3 = RDFDetail::RInferredType>
1792 std::string_view v3Name = "")
1793 {
1794 std::shared_ptr<::TH3D> h(nullptr);
1795 {
1796 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1797 h = model.GetHistogram();
1798 }
1799 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1800 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1801 }
1802 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1803 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1804 ? ColumnNames_t()
1805 : ColumnNames_t(columnViews.begin(), columnViews.end());
1806 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h);
1807 }
1808
1809 ////////////////////////////////////////////////////////////////////////////
1810 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1811 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1812 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1813 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1814 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1815 /// \param[in] model The returned histogram will be constructed using this as a model.
1816 /// \param[in] v1Name The name of the column that will fill the x axis.
1817 /// \param[in] v2Name The name of the column that will fill the y axis.
1818 /// \param[in] v3Name The name of the column that will fill the z axis.
1819 /// \param[in] wName The name of the column that will provide the weights.
1820 /// \return the tridimensional histogram wrapped in a RResultPtr.
1821 ///
1822 /// This action is *lazy*: upon invocation of this method the calculation is
1823 /// booked but not executed. Also see RResultPtr.
1824 ///
1825 /// ### Example usage:
1826 /// ~~~{.cpp}
1827 /// // Deduce column types (this invocation needs jitting internally)
1828 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1829 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1830 /// // Explicit column types
1831 /// using d_t = double;
1832 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1833 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1834 /// ~~~
1835 ///
1836 ///
1837 /// See the documentation of the first Histo2D() overload for more details.
1838 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1839 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1841 std::string_view v3Name, std::string_view wName)
1842 {
1843 std::shared_ptr<::TH3D> h(nullptr);
1844 {
1845 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1846 h = model.GetHistogram();
1847 }
1848 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1849 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1850 }
1851 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1852 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1853 ? ColumnNames_t()
1854 : ColumnNames_t(columnViews.begin(), columnViews.end());
1855 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h);
1856 }
1857
1858 template <typename V1, typename V2, typename V3, typename W>
1860 {
1861 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1862 }
1863
1864 ////////////////////////////////////////////////////////////////////////////
1865 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1866 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1867 /// present.
1868 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1869 /// object.
1870 /// \param[in] model The returned histogram will be constructed using this as a model.
1871 /// \param[in] columnList
1872 /// A list containing the names of the columns that will be passed when calling `Fill`.
1873 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1874 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1875 ///
1876 /// This action is *lazy*: upon invocation of this method the calculation is
1877 /// booked but not executed. See RResultPtr documentation.
1878 ///
1879 /// ### Example usage:
1880 /// ~~~{.cpp}
1881 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1882 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1883 /// {"col0", "col1", "col2", "col3"});
1884 /// ~~~
1885 ///
1886 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1887 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1888 {
1889 std::shared_ptr<::THnD> h(nullptr);
1890 {
1891 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1892 h = model.GetHistogram();
1893
1894 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1895 h->Sumw2();
1896 } else if (int(columnList.size()) != h->GetNdimensions()) {
1897 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1898 }
1899 }
1900 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h);
1901 }
1902
1903 ////////////////////////////////////////////////////////////////////////////
1904 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1905 /// \param[in] model The returned histogram will be constructed using this as a model.
1906 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1907 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1908 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1909 ///
1910 /// This action is *lazy*: upon invocation of this method the calculation is
1911 /// booked but not executed. Also see RResultPtr.
1912 ///
1913 /// ### Example usage:
1914 /// ~~~{.cpp}
1915 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
1916 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1917 /// {"col0", "col1", "col2", "col3"});
1918 /// ~~~
1919 ///
1920 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1921 {
1922 std::shared_ptr<::THnD> h(nullptr);
1923 {
1924 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1925 h = model.GetHistogram();
1926
1927 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1928 h->Sumw2();
1929 } else if (int(columnList.size()) != h->GetNdimensions()) {
1930 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1931 }
1932 }
1933 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h,
1934 columnList.size());
1935 }
1936
1937 ////////////////////////////////////////////////////////////////////////////
1938 /// \brief Fill and return a TGraph object (*lazy action*).
1939 /// \tparam X The type of the column used to fill the x axis.
1940 /// \tparam Y The type of the column used to fill the y axis.
1941 /// \param[in] x The name of the column that will fill the x axis.
1942 /// \param[in] y The name of the column that will fill the y axis.
1943 /// \return the TGraph wrapped in a RResultPtr.
1944 ///
1945 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
1946 /// is filled with each one of the elements of the container.
1947 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1948 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1949 /// A name and a title to the TGraph is given based on the input column names.
1950 ///
1951 /// This action is *lazy*: upon invocation of this method the calculation is
1952 /// booked but not executed. Also see RResultPtr.
1953 ///
1954 /// ### Example usage:
1955 /// ~~~{.cpp}
1956 /// // Deduce column types (this invocation needs jitting internally)
1957 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1958 /// // Explicit column types
1959 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1960 /// ~~~
1961 ///
1962 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
1963 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1964 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1965 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
1967 {
1968 auto graph = std::make_shared<::TGraph>();
1969 const std::vector<std::string_view> columnViews = {x, y};
1970 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1971 ? ColumnNames_t()
1972 : ColumnNames_t(columnViews.begin(), columnViews.end());
1973
1974 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1975
1976 // We build a default name and title based on the input columns
1977 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
1978 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
1979 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1980 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1981 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1982
1983 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph);
1984 }
1985
1986 ////////////////////////////////////////////////////////////////////////////
1987 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
1988 /// \param[in] x The name of the column that will fill the x axis.
1989 /// \param[in] y The name of the column that will fill the y axis.
1990 /// \param[in] exl The name of the column of X low errors
1991 /// \param[in] exh The name of the column of X high errors
1992 /// \param[in] eyl The name of the column of Y low errors
1993 /// \param[in] eyh The name of the column of Y high errors
1994 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
1995 ///
1996 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1997 /// is filled with each one of the elements of the container.
1998 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1999 ///
2000 /// This action is *lazy*: upon invocation of this method the calculation is
2001 /// booked but not executed. Also see RResultPtr.
2002 ///
2003 /// ### Example usage:
2004 /// ~~~{.cpp}
2005 /// // Deduce column types (this invocation needs jitting internally)
2006 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2007 /// // Explicit column types
2008 /// using f = float
2009 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2010 /// ~~~
2011 ///
2012 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2013 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2014 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2015 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2016 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2017 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2020 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2021 {
2022 auto graph = std::make_shared<::TGraphAsymmErrors>();
2023 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2024 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2025 ? ColumnNames_t()
2026 : ColumnNames_t(columnViews.begin(), columnViews.end());
2027
2028 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2029
2030 // We build a default name and title based on the input columns
2031 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
2032 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
2033 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2034 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2035 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2036
2037 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2038 graph);
2039 }
2040
2041 ////////////////////////////////////////////////////////////////////////////
2042 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2043 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2044 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2045 /// \param[in] model The model to be considered to build the new return value.
2046 /// \param[in] v1Name The name of the column that will fill the x axis.
2047 /// \param[in] v2Name The name of the column that will fill the y axis.
2048 /// \return the monodimensional profile wrapped in a RResultPtr.
2049 ///
2050 /// This action is *lazy*: upon invocation of this method the calculation is
2051 /// booked but not executed. Also see RResultPtr.
2052 ///
2053 /// ### Example usage:
2054 /// ~~~{.cpp}
2055 /// // Deduce column types (this invocation needs jitting internally)
2056 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2057 /// // Explicit column types
2058 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2059 /// ~~~
2060 ///
2061 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2062 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2063 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2064 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2066 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2067 {
2068 std::shared_ptr<::TProfile> h(nullptr);
2069 {
2070 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2071 h = model.GetProfile();
2072 }
2073
2074 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2075 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2076 }
2077 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2078 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2079 ? ColumnNames_t()
2080 : ColumnNames_t(columnViews.begin(), columnViews.end());
2081 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h);
2082 }
2083
2084 ////////////////////////////////////////////////////////////////////////////
2085 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2086 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2087 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2088 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2089 /// \param[in] model The model to be considered to build the new return value.
2090 /// \param[in] v1Name The name of the column that will fill the x axis.
2091 /// \param[in] v2Name The name of the column that will fill the y axis.
2092 /// \param[in] wName The name of the column that will provide the weights.
2093 /// \return the monodimensional profile wrapped in a RResultPtr.
2094 ///
2095 /// This action is *lazy*: upon invocation of this method the calculation is
2096 /// booked but not executed. Also see RResultPtr.
2097 ///
2098 /// ### Example usage:
2099 /// ~~~{.cpp}
2100 /// // Deduce column types (this invocation needs jitting internally)
2101 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2102 /// // Explicit column types
2103 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2104 /// "xValues", "yValues", "weight");
2105 /// ~~~
2106 ///
2107 /// See the first Profile1D() overload for more details.
2108 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2109 typename W = RDFDetail::RInferredType>
2112 {
2113 std::shared_ptr<::TProfile> h(nullptr);
2114 {
2115 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2116 h = model.GetProfile();
2117 }
2118
2119 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2120 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2121 }
2122 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2123 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2124 ? ColumnNames_t()
2125 : ColumnNames_t(columnViews.begin(), columnViews.end());
2126 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h);
2127 }
2128
2129 ////////////////////////////////////////////////////////////////////////////
2130 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2131 /// See the first Profile1D() overload for more details.
2132 template <typename V1, typename V2, typename W>
2134 {
2135 return Profile1D<V1, V2, W>(model, "", "", "");
2136 }
2137
2138 ////////////////////////////////////////////////////////////////////////////
2139 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2140 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2141 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2142 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2143 /// \param[in] model The returned profile will be constructed using this as a model.
2144 /// \param[in] v1Name The name of the column that will fill the x axis.
2145 /// \param[in] v2Name The name of the column that will fill the y axis.
2146 /// \param[in] v3Name The name of the column that will fill the z axis.
2147 /// \return the bidimensional profile wrapped in a RResultPtr.
2148 ///
2149 /// This action is *lazy*: upon invocation of this method the calculation is
2150 /// booked but not executed. Also see RResultPtr.
2151 ///
2152 /// ### Example usage:
2153 /// ~~~{.cpp}
2154 /// // Deduce column types (this invocation needs jitting internally)
2155 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2156 /// "xValues", "yValues", "zValues");
2157 /// // Explicit column types
2158 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2159 /// "xValues", "yValues", "zValues");
2160 /// ~~~
2161 ///
2162 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2163 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2164 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2165 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2166 typename V3 = RDFDetail::RInferredType>
2168 std::string_view v2Name = "", std::string_view v3Name = "")
2169 {
2170 std::shared_ptr<::TProfile2D> h(nullptr);
2171 {
2172 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2173 h = model.GetProfile();
2174 }
2175
2176 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2177 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2178 }
2179 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2180 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2181 ? ColumnNames_t()
2182 : ColumnNames_t(columnViews.begin(), columnViews.end());
2183 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h);
2184 }
2185
2186 ////////////////////////////////////////////////////////////////////////////
2187 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2188 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2189 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2190 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2191 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2192 /// \param[in] model The returned histogram will be constructed using this as a model.
2193 /// \param[in] v1Name The name of the column that will fill the x axis.
2194 /// \param[in] v2Name The name of the column that will fill the y axis.
2195 /// \param[in] v3Name The name of the column that will fill the z axis.
2196 /// \param[in] wName The name of the column that will provide the weights.
2197 /// \return the bidimensional profile wrapped in a RResultPtr.
2198 ///
2199 /// This action is *lazy*: upon invocation of this method the calculation is
2200 /// booked but not executed. Also see RResultPtr.
2201 ///
2202 /// ### Example usage:
2203 /// ~~~{.cpp}
2204 /// // Deduce column types (this invocation needs jitting internally)
2205 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2206 /// "xValues", "yValues", "zValues", "weight");
2207 /// // Explicit column types
2208 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2209 /// "xValues", "yValues", "zValues", "weight");
2210 /// ~~~
2211 ///
2212 /// See the first Profile2D() overload for more details.
2213 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2214 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2216 std::string_view v3Name, std::string_view wName)
2217 {
2218 std::shared_ptr<::TProfile2D> h(nullptr);
2219 {
2220 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2221 h = model.GetProfile();
2222 }
2223
2224 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2225 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2226 }
2227 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2228 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2229 ? ColumnNames_t()
2230 : ColumnNames_t(columnViews.begin(), columnViews.end());
2231 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h);
2232 }
2233
2234 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2235 /// See the first Profile2D() overload for more details.
2236 template <typename V1, typename V2, typename V3, typename W>
2238 {
2239 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2240 }
2241
2242 ////////////////////////////////////////////////////////////////////////////
2243 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2244 ///
2245 /// Type T must provide at least:
2246 /// - a copy-constructor
2247 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2248 /// (these types can also be passed as template parameters to this method)
2249 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2250 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2251 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2252 /// the TCollection*).
2253 ///
2254 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2255 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2256 /// \tparam T The type of the object to fill. Automatically deduced.
2257 /// \param[in] model The model to be considered to build the new return value.
2258 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2259 /// \return the filled object wrapped in a RResultPtr.
2260 ///
2261 /// The user gives up ownership of the model object.
2262 /// The list of column names to be used for filling must always be specified.
2263 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2264 /// Also see RResultPtr.
2265 ///
2266 /// ### Example usage:
2267 /// ~~~{.cpp}
2268 /// MyClass obj;
2269 /// // Deduce column types (this invocation needs jitting internally, and in this case
2270 /// // MyClass needs to be known to the interpreter)
2271 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2272 /// // explicit column types
2273 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2274 /// ~~~
2275 ///
2276 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2277 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2278 {
2279 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2280 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2281 throw std::runtime_error("The absence of axes limits is not supported yet.");
2282 }
2283 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h,
2284 columnList.size());
2285 }
2286
2287 ////////////////////////////////////////////////////////////////////////////
2288 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2289 ///
2290 /// \tparam V The type of the value column
2291 /// \param[in] value The name of the column with the values to fill the statistics with.
2292 /// \return the filled TStatistic object wrapped in a RResultPtr.
2293 ///
2294 /// ### Example usage:
2295 /// ~~~{.cpp}
2296 /// // Deduce column type (this invocation needs jitting internally)
2297 /// auto stats0 = myDf.Stats("values");
2298 /// // Explicit column type
2299 /// auto stats1 = myDf.Stats<float>("values");
2300 /// ~~~
2301 ///
2302 template <typename V = RDFDetail::RInferredType>
2304 {
2305 ColumnNames_t columns;
2306 if (!value.empty()) {
2307 columns.emplace_back(std::string(value));
2308 }
2309 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2311 return Fill(TStatistic(), validColumnNames);
2312 } else {
2313 return Fill<V>(TStatistic(), validColumnNames);
2314 }
2315 }
2316
2317 ////////////////////////////////////////////////////////////////////////////
2318 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2319 ///
2320 /// \tparam V The type of the value column
2321 /// \tparam W The type of the weight column
2322 /// \param[in] value The name of the column with the values to fill the statistics with.
2323 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2324 /// \return the filled TStatistic object wrapped in a RResultPtr.
2325 ///
2326 /// ### Example usage:
2327 /// ~~~{.cpp}
2328 /// // Deduce column types (this invocation needs jitting internally)
2329 /// auto stats0 = myDf.Stats("values", "weights");
2330 /// // Explicit column types
2331 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2332 /// ~~~
2333 ///
2334 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2336 {
2337 ColumnNames_t columns{std::string(value), std::string(weight)};
2338 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2339 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2340 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2341 // We have 3 cases:
2342 // 1. Both types are inferred: we use Fill and let the jit kick in.
2343 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2344 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2345 if (vIsInferred && wIsInferred) {
2346 return Fill(TStatistic(), validColumnNames);
2347 } else if (vIsInferred != wIsInferred) {
2348 std::string error("The ");
2349 error += vIsInferred ? "value " : "weight ";
2350 error += "column type is explicit, while the ";
2351 error += vIsInferred ? "weight " : "value ";
2352 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2353 throw std::runtime_error(error);
2354 } else {
2355 return Fill<V, W>(TStatistic(), validColumnNames);
2356 }
2357 }
2358
2359 ////////////////////////////////////////////////////////////////////////////
2360 /// \brief Return the minimum of processed column values (*lazy action*).
2361 /// \tparam T The type of the branch/column.
2362 /// \param[in] columnName The name of the branch/column to be treated.
2363 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2364 ///
2365 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2366 /// template specialization of this method.
2367 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2368 ///
2369 /// This action is *lazy*: upon invocation of this method the calculation is
2370 /// booked but not executed. Also see RResultPtr.
2371 ///
2372 /// ### Example usage:
2373 /// ~~~{.cpp}
2374 /// // Deduce column type (this invocation needs jitting internally)
2375 /// auto minVal0 = myDf.Min("values");
2376 /// // Explicit column type
2377 /// auto minVal1 = myDf.Min<double>("values");
2378 /// ~~~
2379 ///
2380 template <typename T = RDFDetail::RInferredType>
2382 {
2383 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2384 using RetType_t = RDFDetail::MinReturnType_t<T>;
2385 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2386 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
2387 }
2388
2389 ////////////////////////////////////////////////////////////////////////////
2390 /// \brief Return the maximum of processed column values (*lazy action*).
2391 /// \tparam T The type of the branch/column.
2392 /// \param[in] columnName The name of the branch/column to be treated.
2393 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2394 ///
2395 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2396 /// template specialization of this method.
2397 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2398 ///
2399 /// This action is *lazy*: upon invocation of this method the calculation is
2400 /// booked but not executed. Also see RResultPtr.
2401 ///
2402 /// ### Example usage:
2403 /// ~~~{.cpp}
2404 /// // Deduce column type (this invocation needs jitting internally)
2405 /// auto maxVal0 = myDf.Max("values");
2406 /// // Explicit column type
2407 /// auto maxVal1 = myDf.Max<double>("values");
2408 /// ~~~
2409 ///
2410 template <typename T = RDFDetail::RInferredType>
2412 {
2413 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2414 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2415 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2416 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
2417 }
2418
2419 ////////////////////////////////////////////////////////////////////////////
2420 /// \brief Return the mean of processed column values (*lazy action*).
2421 /// \tparam T The type of the branch/column.
2422 /// \param[in] columnName The name of the branch/column to be treated.
2423 /// \return the mean value of the selected column wrapped in a RResultPtr.
2424 ///
2425 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2426 /// template specialization of this method.
2427 ///
2428 /// This action is *lazy*: upon invocation of this method the calculation is
2429 /// booked but not executed. Also see RResultPtr.
2430 ///
2431 /// ### Example usage:
2432 /// ~~~{.cpp}
2433 /// // Deduce column type (this invocation needs jitting internally)
2434 /// auto meanVal0 = myDf.Mean("values");
2435 /// // Explicit column type
2436 /// auto meanVal1 = myDf.Mean<double>("values");
2437 /// ~~~
2438 ///
2439 template <typename T = RDFDetail::RInferredType>
2441 {
2442 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2443 auto meanV = std::make_shared<double>(0);
2444 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
2445 }
2446
2447 ////////////////////////////////////////////////////////////////////////////
2448 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2449 /// \tparam T The type of the branch/column.
2450 /// \param[in] columnName The name of the branch/column to be treated.
2451 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2452 ///
2453 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2454 /// template specialization of this method.
2455 ///
2456 /// This action is *lazy*: upon invocation of this method the calculation is
2457 /// booked but not executed. Also see RResultPtr.
2458 ///
2459 /// ### Example usage:
2460 /// ~~~{.cpp}
2461 /// // Deduce column type (this invocation needs jitting internally)
2462 /// auto stdDev0 = myDf.StdDev("values");
2463 /// // Explicit column type
2464 /// auto stdDev1 = myDf.StdDev<double>("values");
2465 /// ~~~
2466 ///
2467 template <typename T = RDFDetail::RInferredType>
2469 {
2470 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2471 auto stdDeviationV = std::make_shared<double>(0);
2472 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
2473 }
2474
2475 // clang-format off
2476 ////////////////////////////////////////////////////////////////////////////
2477 /// \brief Return the sum of processed column values (*lazy action*).
2478 /// \tparam T The type of the branch/column.
2479 /// \param[in] columnName The name of the branch/column.
2480 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2481 /// \return the sum of the selected column wrapped in a RResultPtr.
2482 ///
2483 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2484 /// template specialization of this method.
2485 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2486 ///
2487 /// This action is *lazy*: upon invocation of this method the calculation is
2488 /// booked but not executed. Also see RResultPtr.
2489 ///
2490 /// ### Example usage:
2491 /// ~~~{.cpp}
2492 /// // Deduce column type (this invocation needs jitting internally)
2493 /// auto sum0 = myDf.Sum("values");
2494 /// // Explicit column type
2495 /// auto sum1 = myDf.Sum<double>("values");
2496 /// ~~~
2497 ///
2498 template <typename T = RDFDetail::RInferredType>
2500 Sum(std::string_view columnName = "",
2501 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2502 {
2503 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2504 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2505 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
2506 }
2507 // clang-format on
2508
2509 ////////////////////////////////////////////////////////////////////////////
2510 /// \brief Gather filtering statistics.
2511 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2512 ///
2513 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2514 /// all named filters in the call graph. Calling this method on a
2515 /// stored chain state (i.e. a graph node different from the first) gathers
2516 /// the stats for all named filters in the chain section between the original
2517 /// `RDataFrame` and that node (included). Stats are gathered in the same
2518 /// order as the named filters have been added to the graph.
2519 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2520 /// effects cuts had.
2521 ///
2522 /// This action is *lazy*: upon invocation of
2523 /// this method the calculation is booked but not executed. See RResultPtr
2524 /// documentation.
2525 ///
2526 /// ### Example usage:
2527 /// ~~~{.cpp}
2528 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2529 /// auto cutReport = filtered3.Report();
2530 /// cutReport->Print();
2531 /// ~~~
2532 ///
2534 {
2535 bool returnEmptyReport = false;
2536 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2537 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2538 // certainly does not contain named filters.
2539 // The number 4 takes into account the implicit columns for entry and slot number
2540 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2542 returnEmptyReport = true;
2543
2544 auto rep = std::make_shared<RCutFlowReport>();
2545 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2547
2548 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
2550
2551 fLoopManager->Book(action.get());
2552 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2553 }
2554
2555 /////////////////////////////////////////////////////////////////////////////
2556 /// \brief Returns the names of the available columns.
2557 /// \return the container of column names.
2558 ///
2559 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2560 ///
2561 /// ### Example usage:
2562 /// ~~~{.cpp}
2563 /// auto colNames = d.GetColumnNames();
2564 /// // Print columns' names
2565 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
2566 /// ~~~
2567 ///
2569 {
2570 // there could be duplicates between Redefined columns and columns in the data source
2571 std::unordered_set<std::string> allColumns;
2572
2573 auto addIfNotInternal = [&allColumns](std::string_view colName) {
2574 if (!RDFInternal::IsInternalColumn(colName))
2575 allColumns.emplace(colName);
2576 };
2577
2578 auto definedColumns = fColRegister.GetNames();
2579
2580 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
2581
2582 auto tree = fLoopManager->GetTree();
2583 if (tree) {
2584 for (const auto &bName : RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false))
2585 allColumns.emplace(bName);
2586 }
2587
2588 if (fDataSource) {
2589 for (const auto &s : fDataSource->GetColumnNames()) {
2590 if (s.rfind("R_rdf_sizeof", 0) != 0)
2591 allColumns.emplace(s);
2592 }
2593 }
2594
2595 ColumnNames_t ret(allColumns.begin(), allColumns.end());
2596 std::sort(ret.begin(), ret.end());
2597 return ret;
2598 }
2599
2600 /////////////////////////////////////////////////////////////////////////////
2601 /// \brief Return the type of a given column as a string.
2602 /// \return the type of the required column.
2603 ///
2604 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2605 ///
2606 /// ### Example usage:
2607 /// ~~~{.cpp}
2608 /// auto colType = d.GetColumnType("columnName");
2609 /// // Print column type
2610 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
2611 /// ~~~
2612 ///
2614 {
2615 const auto col = fColRegister.ResolveAlias(std::string(column));
2616
2617 RDFDetail::RDefineBase *define = fColRegister.HasName(col) ? fColRegister.GetColumns().at(col).get() : nullptr;
2618
2619 const bool convertVector2RVec = true;
2621 convertVector2RVec);
2622 }
2623
2624 /////////////////////////////////////////////////////////////////////////////
2625 /// \brief Return information about the dataframe.
2626 /// \return information about the dataframe as RDFDescription object
2627 ///
2628 /// This convenience function describes the dataframe and combines the following information:
2629 /// - Number of event loops run, see GetNRuns()
2630 /// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
2631 /// - Column names, see GetColumnNames()
2632 /// - Column types, see GetColumnType()
2633 /// - Number of processing slots, see GetNSlots()
2634 ///
2635 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2636 /// The result is dependent on the node from which this method is called, e.g. the list of
2637 /// defined columns returned by GetDefinedColumnNames().
2638 ///
2639 /// Please note that this is a convenience feature and the layout of the output can be subject
2640 /// to change and should be parsed via RDFDescription methods.
2641 ///
2642 /// ### Example usage:
2643 /// ~~~{.cpp}
2644 /// RDataFrame df(10);
2645 /// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
2646 /// // Describe the dataframe
2647 /// df2.Describe().Print()
2648 /// df2.Describe().Print(/*shortFormat=*/true)
2649 /// std::cout << df2.Describe().AsString() << std::endl;
2650 /// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
2651 /// ~~~
2652 ///
2654 {
2655 // Build set of defined column names to find later in all column names
2656 // the defined columns more efficiently
2657 const auto columnNames = GetColumnNames();
2658 std::set<std::string> definedColumnNamesSet;
2659 for (const auto &name : GetDefinedColumnNames())
2660 definedColumnNamesSet.insert(name);
2661
2662 // Get information for the metadata table
2663 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines",
2664 "Event loops run", "Processing slots"};
2665 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2666 std::to_string(definedColumnNamesSet.size()),
2667 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
2668
2669 // Set header for metadata table
2670 const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
2671 // The column width of the values is required to make right-bound numbers and is equal
2672 // to the maximum of the string "Value" and all values to be put in this column.
2673 const auto columnWidthValues =
2674 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
2675 std::stringstream ss;
2676 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
2677 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
2678
2679 // Build metadata table
2680 // All numbers should be bound to the right and strings bound to the left.
2681 for (auto i = 0u; i < metadataProperties.size(); i++) {
2682 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2683 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
2684 }
2685 ss << '\n'; // put space between this and the next table
2686
2687 // Set header for columns table
2688 const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
2689 const auto columnTypes = GetColumnTypeNamesList(columnNames);
2690 const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
2691 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
2692 << "Origin\n"
2693 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
2694 << "------\n";
2695
2696 // Build columns table
2697 const auto nCols = columnNames.size();
2698 for (auto i = 0u; i < nCols; i++) {
2699 auto origin = "Dataset";
2700 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2701 origin = "Define";
2702 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2703 << columnTypes[i] << origin;
2704 if (i < nCols - 1)
2705 ss << '\n';
2706 }
2707 // Use the string returned from DescribeDataset() as the 'brief' description
2708 // Use the converted to string stringstream ss as the 'full' description
2709 return RDFDescription(DescribeDataset(), ss.str());
2710 }
2711
2712 /// \brief Returns the names of the filters created.
2713 /// \return the container of filters names.
2714 ///
2715 /// If called on a root node, all the filters in the computation graph will
2716 /// be printed. For any other node, only the filters upstream of that node.
2717 /// Filters without a name are printed as "Unnamed Filter"
2718 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2719 ///
2720 /// ### Example usage:
2721 /// ~~~{.cpp}
2722 /// auto filtNames = d.GetFilterNames();
2723 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2724 /// ~~~
2725 ///
2726 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2727
2728 /// \brief Returns the names of the defined columns.
2729 /// \return the container of the defined column names.
2730 ///
2731 /// This is not an action nor a transformation, just a simple utility to
2732 /// get the columns names that have been defined up to the node.
2733 /// If no column has been defined, e.g. on a root node, it returns an
2734 /// empty collection.
2735 ///
2736 /// ### Example usage:
2737 /// ~~~{.cpp}
2738 /// auto defColNames = d.GetDefinedColumnNames();
2739 /// // Print defined columns' names
2740 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
2741 /// ~~~
2742 ///
2744 {
2745 ColumnNames_t definedColumns;
2746
2747 auto columns = fColRegister.GetColumns();
2748
2749 for (const auto &column : columns) {
2750 if (!RDFInternal::IsInternalColumn(column.first))
2751 definedColumns.emplace_back(column.first);
2752 }
2753
2754 return definedColumns;
2755 }
2756
2757 /// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
2758 ///
2759 /// This is not an action nor a transformation, just a simple utility to
2760 /// inspect the systematic variations that have been registered with Vary() up to this node.
2761 /// When called on the root node, it returns an empty descriptor.
2762 ///
2763 /// ### Example usage:
2764 /// ~~~{.cpp}
2765 /// auto variations = d.GetVariations();
2766 /// variations.Print();
2767 /// ~~~
2768 ///
2770 {
2771 return {fColRegister.GetVariations()};
2772 }
2773
2774 /// \brief Checks if a column is present in the dataset.
2775 /// \return true if the column is available, false otherwise
2776 ///
2777 /// This method checks if a column is part of the input ROOT dataset, has
2778 /// been defined or can be provided by the data source.
2779 ///
2780 /// Example usage:
2781 /// ~~~{.cpp}
2782 /// ROOT::RDataFrame base(1);
2783 /// auto rdf = base.Define("definedColumn", [](){return 0;});
2784 /// rdf.HasColumn("definedColumn"); // true: we defined it
2785 /// rdf.HasColumn("rdfentry_"); // true: it's always there
2786 /// rdf.HasColumn("foo"); // false: it is not there
2787 /// ~~~
2789 {
2790 if (fColRegister.HasName(columnName))
2791 return true;
2792
2793 if (auto tree = fLoopManager->GetTree()) {
2794 const auto &branchNames = fLoopManager->GetBranchNames();
2795 const auto branchNamesEnd = branchNames.end();
2796 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2797 return true;
2798 }
2799
2800 if (fDataSource && fDataSource->HasColumn(columnName))
2801 return true;
2802
2803 return false;
2804 }
2805
2806 /// \brief Gets the number of data processing slots.
2807 /// \return The number of data processing slots used by this RDataFrame instance
2808 ///
2809 /// This method returns the number of data processing slots used by this RDataFrame
2810 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
2811 ///
2812 /// Example usage:
2813 /// ~~~{.cpp}
2814 /// ROOT::EnableImplicitMT(6)
2815 /// ROOT::RDataFrame df(1);
2816 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
2817 /// ~~~
2818 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
2819
2820 /// \brief Gets the number of event loops run.
2821 /// \return The number of event loops run by this RDataFrame instance
2822 ///
2823 /// This method returns the number of events loops run so far by this RDataFrame instance.
2824 ///
2825 /// Example usage:
2826 /// ~~~{.cpp}
2827 /// ROOT::RDataFrame df(1);
2828 /// std::cout << df.GetNRuns() << std::endl; // prints "0"
2829 /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
2830 /// std::cout << df.GetNRuns() << std::endl; // prints "1"
2831 /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
2832 /// std::cout << df.GetNRuns() << std::endl; // prints "2"
2833 /// ~~~
2834 unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
2835
2836 // clang-format off
2837 ////////////////////////////////////////////////////////////////////////////
2838 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2839 /// \tparam F The type of the aggregator callable. Automatically deduced.
2840 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2841 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2842 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2843 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2844 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2845 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2846 /// \return the result of the aggregation wrapped in a RResultPtr.
2847 ///
2848 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2849 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2850 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2851 /// the value of the column columnName.
2852 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2853 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2854 ///
2855 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2856 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2857 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2858 ///
2859 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2860 ///
2861 /// Example usage:
2862 /// ~~~{.cpp}
2863 /// auto aggregator = [](double acc, double x) { return acc * x; };
2864 /// ROOT::EnableImplicitMT();
2865 /// // If multithread is enabled, the aggregator function will be called by more threads
2866 /// // and will produce a vector of partial accumulators.
2867 /// // The merger function performs the final aggregation of these partial results.
2868 /// auto merger = [](std::vector<double> &accumulators) {
2869 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2870 /// accumulators[0] *= accumulators[i];
2871 /// }
2872 /// };
2873 ///
2874 /// // The accumulator is initialized at this value by every thread.
2875 /// double initValue = 1.;
2876 ///
2877 /// // Multiplies all elements of the column "x"
2878 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2879 /// ~~~
2880 // clang-format on
2881 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2882 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2883 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2884 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2885 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2886 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2887 {
2888 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2889 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2890
2891 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2892 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2893
2894 auto accObjPtr = std::make_shared<U>(aggIdentity);
2895 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2897 auto action = std::make_unique<Action_t>(
2898 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2900 fLoopManager->Book(action.get());
2901 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2902 }
2903
2904 // clang-format off
2905 ////////////////////////////////////////////////////////////////////////////
2906 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2907 /// \tparam F The type of the aggregator callable. Automatically deduced.
2908 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2909 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2910 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2911 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2912 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2913 /// \return the result of the aggregation wrapped in a RResultPtr.
2914 ///
2915 /// See previous Aggregate overload for more information.
2916 // clang-format on
2917 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2918 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2919 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2920 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2921 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2922 {
2923 static_assert(
2925 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2926 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2927 }
2928
2929 // clang-format off
2930 ////////////////////////////////////////////////////////////////////////////
2931 /// \brief Book execution of a custom action using a user-defined helper object.
2932 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2933 /// \tparam OtherColumns A list of the types of the other columns used by this action
2934 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2935 /// \param[in] helper The Action Helper to be scheduled.
2936 /// \param[in] columns The names of the columns on which the helper acts.
2937 /// \return the result of the helper wrapped in a RResultPtr.
2938 ///
2939 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2940 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2941 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2942 ///
2943 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2944 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2945 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2946 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2947 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2948 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2949 /// the requested columns for the particular entry being processed.
2950 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2951 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2952 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2953 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations.
2954 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2955 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2956 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2957 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2958 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2959 /// method concurrently, but will always pass different 'slot' numbers.
2960 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2961 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2962 /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2963 /// * ROOT::RDF::SampleCallback_t GetSampleCallback(): optional. If present, it must return a callable with the
2964 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2965 /// of every sample, as per with DefinePerSample().
2966 ///
2967 /// In case this is called without specifying column types, jitting is used,
2968 /// and the Helper class needs to be known to the interpreter.<br>
2969 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2970 ///
2971 /// ### Examples
2972 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.<br>
2973 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2974 ///
2975 // clang-format on
2976
2977 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2979 {
2980 using HelperT = std::decay_t<Helper>;
2981 // TODO add more static sanity checks on Helper
2984 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2985
2986 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2987 auto resPtr = hPtr->GetResultPtr();
2988
2990 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2991 } else {
2992 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2993 columns.size());
2994 }
2995 }
2996
2997 ////////////////////////////////////////////////////////////////////////////
2998 /// \brief Provides a representation of the columns in the dataset.
2999 /// \tparam ColumnTypes variadic list of branch/column types.
3000 /// \param[in] columnList Names of the columns to be displayed.
3001 /// \param[in] nRows Number of events for each column to be displayed.
3002 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3003 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3004 ///
3005 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3006 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3007 /// return a complete version through `RDisplay::AsString()`.
3008 ///
3009 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3010 /// RResultPtr.
3011 ///
3012 /// Example usage:
3013 /// ~~~{.cpp}
3014 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3015 /// auto d1 = rdf.Display("");
3016 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3017 /// auto d2 = d.Display({"x", "y"}, 128);
3018 /// // Printing the short representations, the event loop will run
3019 /// d1->Print();
3020 /// d2->Print();
3021 /// ~~~
3022 template <typename... ColumnTypes>
3023 RResultPtr<RDisplay>
3024 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
3025 {
3026 CheckIMTDisabled("Display");
3027 auto newCols = columnList;
3028 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3029 auto displayer = std::make_shared<RDFInternal::RDisplay>(newCols, GetColumnTypeNamesList(newCols), nRows, nMaxCollectionElements);
3030 // Need to add ULong64_t type corresponding to the first column rdfentry_
3031 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(newCols, displayer, displayer);
3032 }
3033
3034 ////////////////////////////////////////////////////////////////////////////
3035 /// \brief Provides a representation of the columns in the dataset.
3036 /// \param[in] columnList Names of the columns to be displayed.
3037 /// \param[in] nRows Number of events for each column to be displayed.
3038 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3039 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3040 ///
3041 /// This overload automatically infers the column types.
3042 /// See the previous overloads for further details.
3043 ///
3044 /// Invoked when no types are specified to Display
3046 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
3047 {
3048 CheckIMTDisabled("Display");
3049 auto newCols = columnList;
3050 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3051 auto displayer = std::make_shared<RDFInternal::RDisplay>(newCols, GetColumnTypeNamesList(newCols), nRows, nMaxCollectionElements);
3052 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(newCols, displayer, displayer,
3053 newCols.size());
3054 }
3055
3056 ////////////////////////////////////////////////////////////////////////////
3057 /// \brief Provides a representation of the columns in the dataset.
3058 /// \param[in] columnNameRegexp A regular expression to select the columns.
3059 /// \param[in] nRows Number of events for each column to be displayed.
3060 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3061 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3062 ///
3063 /// The existing columns are matched against the regular expression. If the string provided
3064 /// is empty, all columns are selected.
3065 /// See the previous overloads for further details.
3067 Display(std::string_view columnNameRegexp = "", int nRows = 5, size_t nMaxCollectionElements = 10)
3068 {
3069 const auto columnNames = GetColumnNames();
3070 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3071 return Display(selectedColumns, nRows, nMaxCollectionElements);
3072 }
3073
3074 ////////////////////////////////////////////////////////////////////////////
3075 /// \brief Provides a representation of the columns in the dataset.
3076 /// \param[in] columnList Names of the columns to be displayed.
3077 /// \param[in] nRows Number of events for each column to be displayed.
3078 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3079 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3080 ///
3081 /// See the previous overloads for further details.
3082 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, int nRows = 5,
3083 size_t nMaxCollectionElements = 10)
3084 {
3085 ColumnNames_t selectedColumns(columnList);
3086 return Display(selectedColumns, nRows, nMaxCollectionElements);
3087 }
3088
3089private:
3091 {
3092 // Entry number column
3093 const std::string entryColName = "rdfentry_";
3094 const std::string entryColType = "ULong64_t";
3095 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
3096 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
3097
3098 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
3100 fColRegister.AddColumn(entryColumn);
3101
3102 // Slot number column
3103 const std::string slotColName = "rdfslot_";
3104 const std::string slotColType = "unsigned int";
3105 auto slotColGen = [](unsigned int slot) { return slot; };
3106 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
3107
3108 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
3110 fColRegister.AddColumn(slotColumn);
3111
3112 fColRegister.AddAlias("tdfentry_", entryColName);
3113 fColRegister.AddAlias("tdfslot_", slotColName);
3114 }
3115
3116 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
3117 {
3118 std::vector<std::string> types;
3119
3120 for (auto column : columnList) {
3121 types.push_back(GetColumnType(column));
3122 }
3123 return types;
3124 }
3125
3127 {
3129 std::string error(callerName);
3130 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
3131 throw std::runtime_error(error);
3132 }
3133 }
3134
3135 /// Create RAction object, return RResultPtr for the action
3136 /// Overload for the case in which all column types were specified (no jitting).
3137 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
3138 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
3139 /// for which the constructor arguments of the action helper are different from the returned value.
3140 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3141 typename HelperArgType = ActionResultType,
3142 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3144 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3145 const std::shared_ptr<HelperArgType> &helperArg, const int /*nColumns*/ = -1)
3146 {
3147 constexpr auto nColumns = sizeof...(ColTypes);
3148
3149 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3151
3152 const auto nSlots = fLoopManager->GetNSlots();
3153
3154 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, fProxiedPtr, ActionTag{},
3155 fColRegister);
3156 fLoopManager->Book(action.get());
3157 fLoopManager->AddSampleCallback(action->GetSampleCallback());
3158 return MakeResultPtr(r, *fLoopManager, std::move(action));
3159 }
3160
3161 /// Create RAction object, return RResultPtr for the action
3162 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
3163 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
3164 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
3165 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3166 typename HelperArgType = ActionResultType,
3167 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3168 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3169 const std::shared_ptr<HelperArgType> &helperArg, const int nColumns = -1)
3170 {
3171 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
3172
3173 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
3174 const unsigned int nSlots = fLoopManager->GetNSlots();
3175
3176 auto *tree = fLoopManager->GetTree();
3177 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
3178
3179 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3180 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
3181 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
3182
3183 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(
3184 *fLoopManager, validColumnNames, fColRegister, fProxiedPtr->GetVariations());
3185 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
3186
3187 auto toJit =
3188 RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap, typeid(HelperArgType), typeid(ActionTag),
3189 helperArgOnHeap, tree, nSlots, fColRegister, fDataSource, jittedActionOnHeap);
3190 fLoopManager->Book(jittedAction.get());
3191 fLoopManager->ToJitExec(toJit);
3192 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
3193 }
3194
3195 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3197 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3198 {
3200 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3203 } else {
3207 }
3208
3209 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3210 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3212 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3214
3215 constexpr auto nColumns = ColTypes_t::list_size;
3216
3217 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3218 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3219
3220 // Declare return type to the interpreter, for future use by jitted actions
3221 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3222 if (retTypeName.empty()) {
3223 // The type is not known to the interpreter.
3224 // We must not error out here, but if/when this column is used in jitted code
3225 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3226 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3227 }
3228
3229 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3230 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3232 fLoopManager->Book(newColumn.get());
3233
3235 newCols.AddColumn(newColumn);
3236
3237 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
3238
3239 return newInterface;
3240 }
3241
3242 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3243 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3244 // this way compilation of `Define` has no way to continue after throwing the error.
3245 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3246 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3247 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3248 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3250 {
3251 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3252 "Error in `Define`: type returned by expression is not default-constructible");
3253 return *this; // never reached
3254 }
3255
3256 template <typename... ColumnTypes>
3258 const ColumnNames_t &columnList, const RSnapshotOptions &options)
3259 {
3260 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3261
3262 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3263 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3266
3267 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3268 const auto &treename = parsedTreePath.fTreeName;
3269 const auto &dirname = parsedTreePath.fDirName;
3270
3271 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3272 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3273
3275 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
3276
3277 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
3278
3279 if (!options.fLazy)
3280 *resPtr;
3281 return resPtr;
3282 }
3283
3284 ////////////////////////////////////////////////////////////////////////////
3285 /// \brief Implementation of cache.
3286 template <typename... ColTypes, std::size_t... S>
3287 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3288 {
3289 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3290
3291 // Check at compile time that the columns types are copy constructible
3292 constexpr bool areCopyConstructible =
3294 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3295
3296 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3297
3298 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3299 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3300 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3301
3302 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3303
3304 return cachedRDF;
3305 }
3306
3307 template <typename Helper, typename ActionResultType>
3308 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3309 const std::shared_ptr<Helper> &hPtr,
3311 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3312 {
3313 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, 0u);
3314 }
3315
3316 template <typename Helper, typename ActionResultType, typename... Others>
3317 RResultPtr<ActionResultType>
3318 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3319 const std::shared_ptr<Helper>& /*hPtr*/,
3320 Others...)
3321 {
3322 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3323 "columns! The action helper type was ") +
3324 typeid(Helper).name());
3325 return {};
3326 }
3327
3328 template <typename RetType>
3329 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
3330 std::string_view variationName)
3331 {
3332 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
3333 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
3334 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3335
3336 for (auto &colName : colNames) {
3337 RDFInternal::CheckValidCppVarName(colName, "Vary");
3340 }
3341 RDFInternal::CheckValidCppVarName(variationName, "Vary");
3342
3343 static_assert(RDFInternal::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
3344
3345 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
3347 if (!hasInnerRVec)
3348 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
3349 "does not return an RVec of RVecs.");
3350
3351 auto colTypes = GetColumnTypeNamesList(colNames);
3352 auto allColTypesEqual =
3353 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
3354 if (!allColTypesEqual)
3355 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
3356
3357 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
3358
3359 const auto &definesMap = fColRegister.GetColumns();
3360 for (auto i = 0u; i < colTypes.size(); ++i) {
3361 const auto it = definesMap.find(colNames[i]);
3362 const auto &expectedTypeID =
3363 it == definesMap.end() ? RDFInternal::TypeName2TypeID(colTypes[i]) : it->second->GetTypeId();
3364 if (innerTypeID != expectedTypeID)
3365 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
3366 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
3367 colTypes[i] + ").");
3368 }
3369 } else { // we are varying a single column, RetType is RVec<T>
3370 const auto &retTypeID = typeid(typename RetType::value_type);
3371 const auto &colName = colNames[0]; // we have only one element in there
3372 const auto &definesMap = fColRegister.GetColumns();
3373 const auto it = definesMap.find(colName);
3374 const auto &expectedTypeID =
3375 it == definesMap.end() ? RDFInternal::TypeName2TypeID(GetColumnType(colName)) : it->second->GetTypeId();
3376 if (retTypeID != expectedTypeID)
3377 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
3378 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
3379 GetColumnType(colName) + ").");
3380 }
3381
3382 // when varying multiple columns, they must be different columns
3383 if (colNames.size() > 1) {
3384 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3385 if (uniqueCols.size() != colNames.size())
3386 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3387 }
3388 }
3389
3390protected:
3391 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns,
3392 RDataSource *ds)
3393 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fColRegister(columns)
3394 {
3395 }
3396
3398
3399 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3400
3401 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
3402 {
3404 }
3405
3406 template <typename... ColumnTypes>
3408 {
3409 if (fDataSource != nullptr)
3410 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fColRegister);
3411 }
3412};
3413
3414} // namespace RDF
3415
3416} // namespace ROOT
3417
3418#endif // ROOT_RDF_INTERFACE
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:46
unsigned long long ULong64_t
Definition: RtypesCore.h:81
#define R__ASSERT(e)
Definition: TError.h:118
constexpr Int_t kError
Definition: TError.h:46
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition: TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
Definition: RActionImpl.hxx:26
The head node of a RDF computation graph.
ULong64_t GetNEmptyEntries() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void AddSampleCallback(ROOT::RDF::SampleCallback_t &&callback)
unsigned int GetNRuns() const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition: RAction.hxx:54
A binder for user-defined columns and aliases.
void AddVariation(const std::shared_ptr< RVariationBase > &variation)
Register a new systematic variation.
const DefinesMap_t & GetColumns() const
Returns a map of pointers to the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
void AddColumn(const std::shared_ptr< RDFDetail::RDefineBase > &column)
Add a new booked column.
const VariationsMap_t & GetVariations() const
Returns the multimap of systematic variations, see fVariations.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns (Defines + Aliases).
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual std::string GetLabel()
Return a string representation of the datasource type.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:104
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RVariationsDescription GetVariations()
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RLoopManager * GetLoopManager() const
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
Definition: RInterface.hxx:748
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:869
std::string DescribeDataset() const
Definition: RInterface.hxx:126
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
Definition: RInterface.hxx:421
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:612
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
Definition: RInterface.hxx:819
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:851
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
Definition: RInterface.hxx:392
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:560
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:121
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:895
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
Definition: RInterface.hxx:226
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:327
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:678
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:970
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:502
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RLoopManager * fLoopManager
Definition: RInterface.hxx:119
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:110
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
Definition: RInterface.hxx:124
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:734
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:923
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
Definition: RInterface.hxx:469
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:117
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
Definition: RInterface.hxx:779
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:540
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:521
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:451
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:347
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:287
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:311
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns, RDataSource *ds)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RDFDescription Describe()
Return information about the dataframe.
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:103
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
Definition: RSampleInfo.hxx:32
A descriptor for the systematic variations known to a given RDataFrame node.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:430
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:51
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition: RDFUtils.cxx:374
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition: RDFUtils.cxx:365
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< std::string > ColumnNames_t
Definition: Utils.hxx:35
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:527
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:544
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1195
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:241
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition: Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:30
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:48
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:73
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25