Logo ROOT  
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include "ROOT/RStringView.hxx"
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46#include "TStatistic.h"
49
50#include <algorithm>
51#include <functional>
52#include <limits>
53#include <memory>
54#include <stdexcept>
55#include <string>
56#include <type_traits>
57#include <utility> // std::index_sequence
58#include <vector>
59#include <iomanip>
60#include <numeric> // std::accumulate in MeanHelper
61
62/// \cond HIDDEN_SYMBOLS
63
64namespace ROOT {
65namespace Internal {
66namespace RDF {
67using namespace ROOT::TypeTraits;
68using namespace ROOT::VecOps;
69using namespace ROOT::RDF;
70using namespace ROOT::Detail::RDF;
71
72using Hist_t = ::TH1D;
73
74class RBranchSet {
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
77
78public:
79 TBranch *Get(const std::string &name) const
80 {
81 auto it = std::find(fNames.begin(), fNames.end(), name);
82 if (it == fNames.end())
83 return nullptr;
84 return fBranches[std::distance(fNames.begin(), it)];
85 }
86
87 void Insert(const std::string &name, TBranch *address)
88 {
89 if (address == nullptr) {
90 throw std::logic_error("Trying to insert a null branch address.");
91 }
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error("Trying to insert a branch address that's already present.");
94 }
95 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
96 throw std::logic_error("Trying to insert a branch name that's already present.");
97 }
98 fNames.emplace_back(name);
99 fBranches.emplace_back(address);
100 }
101
102 void Clear()
103 {
104 fBranches.clear();
105 fNames.clear();
106 }
107
108 void AssertNoNullBranchAddresses()
109 {
110 std::vector<TBranch *> branchesWithNullAddress;
111 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
112 [](TBranch *b) { return b->GetAddress() == nullptr; });
113
114 if (branchesWithNullAddress.empty())
115 return;
116
117 // otherwise build error message and throw
118 std::vector<std::string> missingBranchNames;
119 std::transform(branchesWithNullAddress.begin(), branchesWithNullAddress.end(),
120 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
121 std::string msg = "RDataFrame::Snapshot:";
122 if (missingBranchNames.size() == 1) {
123 msg += " branch " + missingBranchNames[0] +
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
125 "it is";
126 } else {
127 msg += " branches ";
128 for (const auto &bName : missingBranchNames)
129 msg += bName + ", ";
130 msg.resize(msg.size() - 2); // remove last ", "
131 msg +=
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
133 }
134 msg += " not part of the set of branches that are being written out.";
135 throw std::runtime_error(msg);
136 }
137};
138
139/// The container type for each thread's partial result in an action helper
140// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
141// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
142// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
143template <typename T>
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
145
146template <typename F>
147class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
148 F fCallable;
149
150public:
152 ForeachSlotHelper(F &&f) : fCallable(f) {}
153 ForeachSlotHelper(ForeachSlotHelper &&) = default;
154 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
155
156 void InitTask(TTreeReader *, unsigned int) {}
157
158 template <typename... Args>
159 void Exec(unsigned int slot, Args &&... args)
160 {
161 // check that the decayed types of Args are the same as the branch types
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
163 fCallable(slot, std::forward<Args>(args)...);
164 }
165
166 void Initialize() { /* noop */}
167
168 void Finalize() { /* noop */}
169
170 std::string GetActionName() { return "ForeachSlot"; }
171};
172
173class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
174 const std::shared_ptr<ULong64_t> fResultCount;
175 Results<ULong64_t> fCounts;
176
177public:
178 using ColumnTypes_t = TypeList<>;
179 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
180 CountHelper(CountHelper &&) = default;
181 CountHelper(const CountHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot);
184 void Initialize() { /* noop */}
185 void Finalize();
186
187 // Helper functions for RMergeableValue
188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
189 {
190 return std::make_unique<RMergeableCount>(*fResultCount);
191 }
192
193 ULong64_t &PartialUpdate(unsigned int slot);
194
195 std::string GetActionName() { return "Count"; }
196
197 CountHelper MakeNew(void *newResult)
198 {
199 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
200 return CountHelper(result, fCounts.size());
201 }
202};
203
204template <typename ProxiedVal_t>
205class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
206 const std::shared_ptr<RCutFlowReport> fReport;
207 // Here we have a weak pointer since we need to keep track of the validity
208 // of the proxied node. It can happen that the user does not trigger the
209 // event loop by looking into the RResultPtr and the chain goes out of scope
210 // before the Finalize method is invoked.
211 std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
212 bool fReturnEmptyReport;
213
214public:
215 using ColumnTypes_t = TypeList<>;
216 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
217 : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
218 ReportHelper(ReportHelper &&) = default;
219 ReportHelper(const ReportHelper &) = delete;
220 void InitTask(TTreeReader *, unsigned int) {}
221 void Exec(unsigned int /* slot */) {}
222 void Initialize() { /* noop */}
223 void Finalize()
224 {
225 // We need the weak_ptr in order to avoid crashes at tear down
226 if (!fReturnEmptyReport && !fProxiedWPtr.expired())
227 fProxiedWPtr.lock()->Report(*fReport);
228 }
229
230 std::string GetActionName() { return "Report"; }
231
232 // TODO implement MakeNew. Requires some smartness in passing the appropriate previous node.
233};
234
235/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
236///
237/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
238/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
239/// thread, making it impossible to merge the per-thread results.
240/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
241/// the decision on the limits as part of the merge operation.
242class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
243 // this sets a total initial size of 16 MB for the buffers (can increase)
244 static constexpr unsigned int fgTotalBufSize = 2097152;
245 using BufEl_t = double;
246 using Buf_t = std::vector<BufEl_t>;
247
248 std::vector<Buf_t> fBuffers;
249 std::vector<Buf_t> fWBuffers;
250 const std::shared_ptr<Hist_t> fResultHist;
251 unsigned int fNSlots;
252 unsigned int fBufSize;
253 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
254 Results<std::unique_ptr<Hist_t>> fPartialHists;
255 Buf_t fMin;
256 Buf_t fMax;
257
258 void UpdateMinMax(unsigned int slot, double v);
259
260public:
261 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
262 BufferedFillHelper(BufferedFillHelper &&) = default;
263 BufferedFillHelper(const BufferedFillHelper &) = delete;
264 void InitTask(TTreeReader *, unsigned int) {}
265 void Exec(unsigned int slot, double v);
266 void Exec(unsigned int slot, double v, double w);
267
269 void Exec(unsigned int slot, const T &vs)
270 {
271 auto &thisBuf = fBuffers[slot];
272 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
273 for (auto v = vs.begin(); v != vs.end(); ++v) {
274 UpdateMinMax(slot, *v);
275 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
276 }
277 }
278
280 void Exec(unsigned int slot, const T &vs, const W &ws)
281 {
282 auto &thisBuf = fBuffers[slot];
283
284 for (auto &v : vs) {
285 UpdateMinMax(slot, v);
286 thisBuf.emplace_back(v);
287 }
288
289 auto &thisWBuf = fWBuffers[slot];
290 for (auto &w : ws) {
291 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
292 }
293 }
294
296 void Exec(unsigned int slot, const T &vs, const W w)
297 {
298 auto &thisBuf = fBuffers[slot];
299 for (auto &v : vs) {
300 UpdateMinMax(slot, v);
301 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
302 }
303
304 auto &thisWBuf = fWBuffers[slot];
305 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
306 }
307
308 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
310 void Exec(unsigned int, const T &, const W &)
311 {
312 throw std::runtime_error(
313 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
314 }
315
316 Hist_t &PartialUpdate(unsigned int);
317
318 void Initialize() { /* noop */}
319
320 void Finalize();
321
322 // Helper functions for RMergeableValue
323 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
324 {
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
326 }
327
328 std::string GetActionName()
329 {
330 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
331 }
332
333 BufferedFillHelper MakeNew(void *newResult)
334 {
335 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
336 result->Reset();
337 result->SetDirectory(nullptr);
338 return BufferedFillHelper(result, fNSlots);
339 }
340};
341
342extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &);
343extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &);
344extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &);
345extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &);
346extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
347extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
348extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
349extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
350extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
351extern template void
352BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
353
354/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
355/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
356template <typename HIST = Hist_t>
357class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
358 std::vector<HIST *> fObjects;
359
360 template <typename H = HIST, typename = decltype(std::declval<H>().Reset())>
361 void ResetIfPossible(H *h)
362 {
363 h->Reset();
364 }
365
366 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
367
368 // cannot safely re-initialize variations of the result, hence error out
369 void ResetIfPossible(...)
370 {
371 throw std::runtime_error(
372 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
373 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
374 }
375
376 void UnsetDirectoryIfPossible(TH1 *h) {
377 h->SetDirectory(nullptr);
378 }
379
380 void UnsetDirectoryIfPossible(...) {}
381
382 // Merge overload for types with Merge(TCollection*), like TH1s
384 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
385 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
386 {
387 TList l;
388 for (auto it = ++objs.begin(); it != objs.end(); ++it)
389 l.Add(*it);
390 objs[0]->Merge(&l);
391 }
392
393 // Merge overload for types with Merge(const std::vector&)
394 template <typename H>
395 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
396 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
397 {
398 objs[0]->Merge({++objs.begin(), objs.end()});
399 }
400
401 // Merge overload to error out in case no valid HIST::Merge method was detected
402 template <typename T>
403 void Merge(T, ...)
404 {
405 static_assert(sizeof(T) < 0,
406 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
407 }
408
409 // class which wraps a pointer and implements a no-op increment operator
410 template <typename T>
411 class ScalarConstIterator {
412 const T *obj_;
413
414 public:
415 ScalarConstIterator(const T *obj) : obj_(obj) {}
416 const T &operator*() const { return *obj_; }
417 ScalarConstIterator<T> &operator++() { return *this; }
418 };
419
420 // helper functions which provide one implementation for scalar types and another for containers
421 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
422 // in c++17 or later
423
424 // return unchanged value for scalar
426 ScalarConstIterator<T> MakeBegin(const T &val)
427 {
428 return ScalarConstIterator<T>(&val);
429 }
430
431 // return iterator to beginning of container
433 auto MakeBegin(const T &val)
434 {
435 return std::begin(val);
436 }
437
438 // return 1 for scalars
440 std::size_t GetSize(const T &)
441 {
442 return 1;
443 }
444
445 // return container size
447 std::size_t GetSize(const T &val)
448 {
449#if __cplusplus >= 201703L
450 return std::size(val);
451#else
452 return val.size();
453#endif
454 }
455
456 template <std::size_t ColIdx, typename End_t, typename... Its>
457 void ExecLoop(unsigned int slot, End_t end, Its... its)
458 {
459 auto *thisSlotH = fObjects[slot];
460 // loop increments all of the iterators while leaving scalars unmodified
461 // TODO this could be simplified with fold expressions or std::apply in C++17
462 auto nop = [](auto &&...) {};
463 for (; GetNthElement<ColIdx>(its...) != end; nop(++its...)) {
464 thisSlotH->Fill(*its...);
465 }
466 }
467
468public:
469 FillHelper(FillHelper &&) = default;
470 FillHelper(const FillHelper &) = delete;
471
472 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
473 {
474 fObjects[0] = h.get();
475 // Initialize all other slots
476 for (unsigned int i = 1; i < nSlots; ++i) {
477 fObjects[i] = new HIST(*fObjects[0]);
478 UnsetDirectoryIfPossible(fObjects[i]);
479 }
480 }
481
482 void InitTask(TTreeReader *, unsigned int) {}
483
484 // no container arguments
485 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
486 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
487 {
488 fObjects[slot]->Fill(x...);
489 }
490
491 // at least one container argument
492 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
493 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
494 {
495 // array of bools keeping track of which inputs are containers
496 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
497
498 // index of the first container input
499 constexpr std::size_t colidx = FindIdxTrue(isContainer);
500 // if this happens, there is a bug in the implementation
501 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
502
503 // get the end iterator to the first container
504 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
505
506 // array of container sizes (1 for scalars)
507 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
508
509 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
510 if (isContainer[i] && sizes[i] != sizes[colidx]) {
511 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
512 }
513 }
514
515 ExecLoop<colidx>(slot, xrefend, MakeBegin(xs)...);
516 }
517
518 template <typename T = HIST>
519 void Exec(...)
520 {
521 static_assert(sizeof(T) < 0,
522 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
523 "columns passed did not match the signature of the object's `Fill` method.");
524 }
525
526 void Initialize() { /* noop */}
527
528 void Finalize()
529 {
530 if (fObjects.size() == 1)
531 return;
532
533 Merge(fObjects, /*toselectcorrectoverload=*/0);
534
535 // delete the copies we created for the slots other than the first
536 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
537 delete *it;
538 }
539
540 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
541
542 // Helper functions for RMergeableValue
543 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
544 {
545 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
546 }
547
548 // if the fObjects vector type is derived from TObject, return the name of the object
550 std::string GetActionName()
551 {
552 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
553 }
554
555 // if fObjects is not derived from TObject, indicate it is some other object
557 std::string GetActionName()
558 {
559 return "Fill custom object";
560 }
561
562 template <typename H = HIST>
563 FillHelper MakeNew(void *newResult)
564 {
565 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
566 ResetIfPossible(result.get());
567 UnsetDirectoryIfPossible(result.get());
568 return FillHelper(result, fObjects.size());
569 }
570};
571
572class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
573public:
574 using Result_t = ::TGraph;
575
576private:
577 std::vector<::TGraph *> fGraphs;
578
579public:
580 FillTGraphHelper(FillTGraphHelper &&) = default;
581 FillTGraphHelper(const FillTGraphHelper &) = delete;
582
583 // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
584 // this method
585 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
586 {
587 fGraphs[0] = g.get();
588 // Initialize all other slots
589 for (unsigned int i = 1; i < nSlots; ++i) {
590 fGraphs[i] = new TGraph(*fGraphs[0]);
591 }
592 }
593
594 void Initialize() {}
595 void InitTask(TTreeReader *, unsigned int) {}
596
597 // case: both types are container types
598 template <typename X0, typename X1,
600 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
601 {
602 if (x0s.size() != x1s.size()) {
603 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
604 }
605 auto *thisSlotG = fGraphs[slot];
606 auto x0sIt = std::begin(x0s);
607 const auto x0sEnd = std::end(x0s);
608 auto x1sIt = std::begin(x1s);
609 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
610 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
611 }
612 }
613
614 // case: both types are non-container types, e.g. scalars
615 template <typename X0, typename X1,
617 void Exec(unsigned int slot, X0 x0, X1 x1)
618 {
619 auto thisSlotG = fGraphs[slot];
620 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
621 }
622
623 // case: types are combination of containers and non-containers
624 // this is not supported, error out
625 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
626 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
627 {
628 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
629 }
630
631 void Finalize()
632 {
633 const auto nSlots = fGraphs.size();
634 auto resGraph = fGraphs[0];
635 TList l;
636 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
637 for (unsigned int slot = 1; slot < nSlots; ++slot) {
638 l.Add(fGraphs[slot]);
639 }
640 resGraph->Merge(&l);
641 }
642
643 // Helper functions for RMergeableValue
644 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
645 {
646 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
647 }
648
649 std::string GetActionName() { return "Graph"; }
650
651 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
652
653 FillTGraphHelper MakeNew(void *newResult)
654 {
655 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
656 result->Set(0);
657 return FillTGraphHelper(result, fGraphs.size());
658 }
659};
660
661class R__CLING_PTRCHECK(off) FillTGraphAsymmErrorsHelper
662 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
663public:
664 using Result_t = ::TGraphAsymmErrors;
665
666private:
667 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
668
669public:
670 FillTGraphAsymmErrorsHelper(FillTGraphAsymmErrorsHelper &&) = default;
671 FillTGraphAsymmErrorsHelper(const FillTGraphAsymmErrorsHelper &) = delete;
672
673 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
674 : fGraphAsymmErrors(nSlots, nullptr)
675 {
676 fGraphAsymmErrors[0] = g.get();
677 // Initialize all other slots
678 for (unsigned int i = 1; i < nSlots; ++i) {
679 fGraphAsymmErrors[i] = new TGraphAsymmErrors(*fGraphAsymmErrors[0]);
680 }
681 }
682
683 void Initialize() {}
684 void InitTask(TTreeReader *, unsigned int) {}
685
686 // case: all types are container types
687 template <
688 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
691 int> = 0>
692 void
693 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
694 {
695 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
696 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
697 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
698 }
699 auto *thisSlotG = fGraphAsymmErrors[slot];
700 auto xsIt = std::begin(xs);
701 auto ysIt = std::begin(ys);
702 auto exlsIt = std::begin(exls);
703 auto exhsIt = std::begin(exhs);
704 auto eylsIt = std::begin(eyls);
705 auto eyhsIt = std::begin(eyhs);
706 while (xsIt != std::end(xs)) {
707 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
708 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
709 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
710 }
711 }
712
713 // case: all types are non-container types, e.g. scalars
714 template <
715 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
718 int> = 0>
719 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
720 {
721 auto thisSlotG = fGraphAsymmErrors[slot];
722 const auto n = thisSlotG->GetN();
723 thisSlotG->SetPoint(n, x, y);
724 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
725 }
726
727 // case: types are combination of containers and non-containers
728 // this is not supported, error out
729 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
730 typename... ExtraArgsToLowerPriority>
731 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
732 {
733 throw std::runtime_error(
734 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
735 }
736
737 void Finalize()
738 {
739 const auto nSlots = fGraphAsymmErrors.size();
740 auto resGraphAsymmErrors = fGraphAsymmErrors[0];
741 TList l;
742 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
743 for (unsigned int slot = 1; slot < nSlots; ++slot) {
744 l.Add(fGraphAsymmErrors[slot]);
745 }
746 resGraphAsymmErrors->Merge(&l);
747 }
748
749 // Helper functions for RMergeableValue
750 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
751 {
752 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
753 }
754
755 std::string GetActionName() { return "GraphAsymmErrors"; }
756
757 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
758
759 FillTGraphAsymmErrorsHelper MakeNew(void *newResult)
760 {
761 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
762 result->Set(0);
763 return FillTGraphAsymmErrorsHelper(result, fGraphAsymmErrors.size());
764 }
765};
766
767// In case of the take helper we have 4 cases:
768// 1. The column is not an RVec, the collection is not a vector
769// 2. The column is not an RVec, the collection is a vector
770// 3. The column is an RVec, the collection is not a vector
771// 4. The column is an RVec, the collection is a vector
772
773template <typename V, typename COLL>
774void FillColl(V&& v, COLL& c) {
775 c.emplace_back(v);
776}
777
778// Use push_back for bool since some compilers do not support emplace_back.
779template <typename COLL>
780void FillColl(bool v, COLL& c) {
781 c.push_back(v);
782}
783
784// Case 1.: The column is not an RVec, the collection is not a vector
785// No optimisations, no transformations: just copies.
786template <typename RealT_t, typename T, typename COLL>
787class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
788 Results<std::shared_ptr<COLL>> fColls;
789
790public:
791 using ColumnTypes_t = TypeList<T>;
792 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
793 {
794 fColls.emplace_back(resultColl);
795 for (unsigned int i = 1; i < nSlots; ++i)
796 fColls.emplace_back(std::make_shared<COLL>());
797 }
798 TakeHelper(TakeHelper &&);
799 TakeHelper(const TakeHelper &) = delete;
800
801 void InitTask(TTreeReader *, unsigned int) {}
802
803 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
804
805 void Initialize() { /* noop */}
806
807 void Finalize()
808 {
809 auto rColl = fColls[0];
810 for (unsigned int i = 1; i < fColls.size(); ++i) {
811 const auto &coll = fColls[i];
812 const auto end = coll->end();
813 // Use an explicit loop here to prevent compiler warnings introduced by
814 // clang's range-based loop analysis and vector<bool> references.
815 for (auto j = coll->begin(); j != end; j++) {
816 FillColl(*j, *rColl);
817 }
818 }
819 }
820
821 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
822
823 std::string GetActionName() { return "Take"; }
824
825 TakeHelper MakeNew(void *newResult)
826 {
827 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
828 result->clear();
829 return TakeHelper(result, fColls.size());
830 }
831};
832
833// Case 2.: The column is not an RVec, the collection is a vector
834// Optimisations, no transformations: just copies.
835template <typename RealT_t, typename T>
836class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
837 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
838 Results<std::shared_ptr<std::vector<T>>> fColls;
839
840public:
841 using ColumnTypes_t = TypeList<T>;
842 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
843 {
844 fColls.emplace_back(resultColl);
845 for (unsigned int i = 1; i < nSlots; ++i) {
846 auto v = std::make_shared<std::vector<T>>();
847 v->reserve(1024);
848 fColls.emplace_back(v);
849 }
850 }
851 TakeHelper(TakeHelper &&);
852 TakeHelper(const TakeHelper &) = delete;
853
854 void InitTask(TTreeReader *, unsigned int) {}
855
856 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
857
858 void Initialize() { /* noop */}
859
860 // This is optimised to treat vectors
861 void Finalize()
862 {
863 ULong64_t totSize = 0;
864 for (auto &coll : fColls)
865 totSize += coll->size();
866 auto rColl = fColls[0];
867 rColl->reserve(totSize);
868 for (unsigned int i = 1; i < fColls.size(); ++i) {
869 auto &coll = fColls[i];
870 rColl->insert(rColl->end(), coll->begin(), coll->end());
871 }
872 }
873
874 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
875
876 std::string GetActionName() { return "Take"; }
877
878 TakeHelper MakeNew(void *newResult)
879 {
880 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
881 result->clear();
882 return TakeHelper(result, fColls.size());
883 }
884};
885
886// Case 3.: The column is a RVec, the collection is not a vector
887// No optimisations, transformations from RVecs to vectors
888template <typename RealT_t, typename COLL>
889class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, COLL>
890 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
891 Results<std::shared_ptr<COLL>> fColls;
892
893public:
894 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
895 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
896 {
897 fColls.emplace_back(resultColl);
898 for (unsigned int i = 1; i < nSlots; ++i)
899 fColls.emplace_back(std::make_shared<COLL>());
900 }
901 TakeHelper(TakeHelper &&);
902 TakeHelper(const TakeHelper &) = delete;
903
904 void InitTask(TTreeReader *, unsigned int) {}
905
906 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
907
908 void Initialize() { /* noop */}
909
910 void Finalize()
911 {
912 auto rColl = fColls[0];
913 for (unsigned int i = 1; i < fColls.size(); ++i) {
914 auto &coll = fColls[i];
915 for (auto &v : *coll) {
916 rColl->emplace_back(v);
917 }
918 }
919 }
920
921 std::string GetActionName() { return "Take"; }
922
923 TakeHelper MakeNew(void *newResult)
924 {
925 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
926 result->clear();
927 return TakeHelper(result, fColls.size());
928 }
929};
930
931// Case 4.: The column is an RVec, the collection is a vector
932// Optimisations, transformations from RVecs to vectors
933template <typename RealT_t>
934class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
935 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
936
937 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
938
939public:
940 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
941 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
942 {
943 fColls.emplace_back(resultColl);
944 for (unsigned int i = 1; i < nSlots; ++i) {
945 auto v = std::make_shared<std::vector<RealT_t>>();
946 v->reserve(1024);
947 fColls.emplace_back(v);
948 }
949 }
950 TakeHelper(TakeHelper &&);
951 TakeHelper(const TakeHelper &) = delete;
952
953 void InitTask(TTreeReader *, unsigned int) {}
954
955 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
956
957 void Initialize() { /* noop */}
958
959 // This is optimised to treat vectors
960 void Finalize()
961 {
962 ULong64_t totSize = 0;
963 for (auto &coll : fColls)
964 totSize += coll->size();
965 auto rColl = fColls[0];
966 rColl->reserve(totSize);
967 for (unsigned int i = 1; i < fColls.size(); ++i) {
968 auto &coll = fColls[i];
969 rColl->insert(rColl->end(), coll->begin(), coll->end());
970 }
971 }
972
973 std::string GetActionName() { return "Take"; }
974
975 TakeHelper MakeNew(void *newResult)
976 {
977 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
978 result->clear();
979 return TakeHelper(result, fColls.size());
980 }
981};
982
983// Extern templates for TakeHelper
984// NOTE: The move-constructor of specializations declared as extern templates
985// must be defined out of line, otherwise cling fails to find its symbol.
986template <typename RealT_t, typename T, typename COLL>
987TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
988template <typename RealT_t, typename T>
989TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
990template <typename RealT_t, typename COLL>
991TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
992template <typename RealT_t>
993TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
994
995// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
996#if __GNUC__ > 5
997extern template class TakeHelper<bool, bool, std::vector<bool>>;
998extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
999extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
1000extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
1001extern template class TakeHelper<int, int, std::vector<int>>;
1002extern template class TakeHelper<long, long, std::vector<long>>;
1003extern template class TakeHelper<long long, long long, std::vector<long long>>;
1004extern template class TakeHelper<float, float, std::vector<float>>;
1005extern template class TakeHelper<double, double, std::vector<double>>;
1006#endif
1007
1008template <typename ResultType>
1009class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
1010 const std::shared_ptr<ResultType> fResultMin;
1011 Results<ResultType> fMins;
1012
1013public:
1014 MinHelper(MinHelper &&) = default;
1015 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1016 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1017 {
1018 }
1019
1020 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1021
1022 void InitTask(TTreeReader *, unsigned int) {}
1023
1025 void Exec(unsigned int slot, const T &vs)
1026 {
1027 for (auto &&v : vs)
1028 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1029 }
1030
1031 void Initialize() { /* noop */}
1032
1033 void Finalize()
1034 {
1035 *fResultMin = std::numeric_limits<ResultType>::max();
1036 for (auto &m : fMins)
1037 *fResultMin = std::min(m, *fResultMin);
1038 }
1039
1040 // Helper functions for RMergeableValue
1041 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1042 {
1043 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1044 }
1045
1046 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1047
1048 std::string GetActionName() { return "Min"; }
1049
1050 MinHelper MakeNew(void *newResult)
1051 {
1052 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1053 return MinHelper(result, fMins.size());
1054 }
1055};
1056
1057// TODO
1058// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
1059// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
1060// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
1061// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
1062// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1063
1064template <typename ResultType>
1065class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1066 const std::shared_ptr<ResultType> fResultMax;
1067 Results<ResultType> fMaxs;
1068
1069public:
1070 MaxHelper(MaxHelper &&) = default;
1071 MaxHelper(const MaxHelper &) = delete;
1072 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1073 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1074 {
1075 }
1076
1077 void InitTask(TTreeReader *, unsigned int) {}
1078 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1079
1081 void Exec(unsigned int slot, const T &vs)
1082 {
1083 for (auto &&v : vs)
1084 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1085 }
1086
1087 void Initialize() { /* noop */}
1088
1089 void Finalize()
1090 {
1091 *fResultMax = std::numeric_limits<ResultType>::lowest();
1092 for (auto &m : fMaxs) {
1093 *fResultMax = std::max(m, *fResultMax);
1094 }
1095 }
1096
1097 // Helper functions for RMergeableValue
1098 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1099 {
1100 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1101 }
1102
1103 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1104
1105 std::string GetActionName() { return "Max"; }
1106
1107 MaxHelper MakeNew(void *newResult)
1108 {
1109 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1110 return MaxHelper(result, fMaxs.size());
1111 }
1112};
1113
1114// TODO
1115// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
1116// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
1117// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
1118// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
1119// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1120
1121template <typename ResultType>
1122class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1123 const std::shared_ptr<ResultType> fResultSum;
1124 Results<ResultType> fSums;
1125 Results<ResultType> fCompensations;
1126
1127 /// Evaluate neutral element for this type and the sum operation.
1128 /// This is assumed to be any_value - any_value if operator- is defined
1129 /// for the type, otherwise a default-constructed ResultType{} is used.
1130 template <typename T = ResultType>
1131 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1132 {
1133 return v - v;
1134 }
1135
1136 template <typename T = ResultType, typename Dummy = int>
1137 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1138 {
1139 return ResultType{};
1140 }
1141
1142public:
1143 SumHelper(SumHelper &&) = default;
1144 SumHelper(const SumHelper &) = delete;
1145 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1146 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1)),
1147 fCompensations(nSlots, NeutralElement(*sumVPtr, -1))
1148 {
1149 }
1150 void InitTask(TTreeReader *, unsigned int) {}
1151
1152 void Exec(unsigned int slot, ResultType x)
1153 {
1154 // Kahan Sum:
1155 ResultType y = x - fCompensations[slot];
1156 ResultType t = fSums[slot] + y;
1157 fCompensations[slot] = (t - fSums[slot]) - y;
1158 fSums[slot] = t;
1159 }
1160
1162 void Exec(unsigned int slot, const T &vs)
1163 {
1164 for (auto &&v : vs) {
1165 Exec(slot, v);
1166 }
1167 }
1168
1169 void Initialize() { /* noop */}
1170
1171 void Finalize()
1172 {
1173 ResultType sum(NeutralElement(ResultType{}, -1));
1174 ResultType compensation(NeutralElement(ResultType{}, -1));
1175 ResultType y(NeutralElement(ResultType{}, -1));
1176 ResultType t(NeutralElement(ResultType{}, -1));
1177 for (auto &m : fSums) {
1178 // Kahan Sum:
1179 y = m - compensation;
1180 t = sum + y;
1181 compensation = (t - sum) - y;
1182 sum = t;
1183 }
1184 *fResultSum += sum;
1185 }
1186
1187 // Helper functions for RMergeableValue
1188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1189 {
1190 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1191 }
1192
1193 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1194
1195 std::string GetActionName() { return "Sum"; }
1196
1197 SumHelper MakeNew(void *newResult)
1198 {
1199 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1200 *result = NeutralElement(*result, -1);
1201 return SumHelper(result, fSums.size());
1202 }
1203};
1204
1205class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1206 const std::shared_ptr<double> fResultMean;
1207 std::vector<ULong64_t> fCounts;
1208 std::vector<double> fSums;
1209 std::vector<double> fPartialMeans;
1210 std::vector<double> fCompensations;
1211
1212public:
1213 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1214 MeanHelper(MeanHelper &&) = default;
1215 MeanHelper(const MeanHelper &) = delete;
1216 void InitTask(TTreeReader *, unsigned int) {}
1217 void Exec(unsigned int slot, double v);
1218
1220 void Exec(unsigned int slot, const T &vs)
1221 {
1222 for (auto &&v : vs) {
1223
1224 fCounts[slot]++;
1225 // Kahan Sum:
1226 double y = v - fCompensations[slot];
1227 double t = fSums[slot] + y;
1228 fCompensations[slot] = (t - fSums[slot]) - y;
1229 fSums[slot] = t;
1230 }
1231 }
1232
1233 void Initialize() { /* noop */}
1234
1235 void Finalize();
1236
1237 // Helper functions for RMergeableValue
1238 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1239 {
1240 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1241 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1242 }
1243
1244 double &PartialUpdate(unsigned int slot);
1245
1246 std::string GetActionName() { return "Mean"; }
1247
1248 MeanHelper MakeNew(void *newResult)
1249 {
1250 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1251 return MeanHelper(result, fSums.size());
1252 }
1253};
1254
1255extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
1256extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
1257extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
1258extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
1259extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1260
1261class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1262 // Number of subsets of data
1263 const unsigned int fNSlots;
1264 const std::shared_ptr<double> fResultStdDev;
1265 // Number of element for each slot
1266 std::vector<ULong64_t> fCounts;
1267 // Mean of each slot
1268 std::vector<double> fMeans;
1269 // Squared distance from the mean
1270 std::vector<double> fDistancesfromMean;
1271
1272public:
1273 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1274 StdDevHelper(StdDevHelper &&) = default;
1275 StdDevHelper(const StdDevHelper &) = delete;
1276 void InitTask(TTreeReader *, unsigned int) {}
1277 void Exec(unsigned int slot, double v);
1278
1280 void Exec(unsigned int slot, const T &vs)
1281 {
1282 for (auto &&v : vs) {
1283 Exec(slot, v);
1284 }
1285 }
1286
1287 void Initialize() { /* noop */}
1288
1289 void Finalize();
1290
1291 // Helper functions for RMergeableValue
1292 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1293 {
1294 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1295 const Double_t mean =
1296 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1297 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1298 }
1299
1300 std::string GetActionName() { return "StdDev"; }
1301
1302 StdDevHelper MakeNew(void *newResult)
1303 {
1304 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1305 return StdDevHelper(result, fCounts.size());
1306 }
1307};
1308
1309extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
1310extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
1311extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
1312extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
1313extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1314
1315template <typename PrevNodeType>
1316class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1317private:
1319 const std::shared_ptr<Display_t> fDisplayerHelper;
1320 const std::shared_ptr<PrevNodeType> fPrevNode;
1321
1322public:
1323 DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1324 : fDisplayerHelper(d), fPrevNode(prevNode)
1325 {
1326 }
1327 DisplayHelper(DisplayHelper &&) = default;
1328 DisplayHelper(const DisplayHelper &) = delete;
1329 void InitTask(TTreeReader *, unsigned int) {}
1330
1331 template <typename... Columns>
1332 void Exec(unsigned int, Columns &... columns)
1333 {
1334 fDisplayerHelper->AddRow(columns...);
1335 if (!fDisplayerHelper->HasNext()) {
1336 fPrevNode->StopProcessing();
1337 }
1338 }
1339
1340 void Initialize() {}
1341
1342 void Finalize() {}
1343
1344 std::string GetActionName() { return "Display"; }
1345};
1346
1347template <typename T>
1348void *GetData(ROOT::VecOps::RVec<T> &v)
1349{
1350 return v.data();
1351}
1352
1353template <typename T>
1354void *GetData(T & /*v*/)
1355{
1356 return nullptr;
1357}
1358
1359template <typename T>
1360void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1361 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1362 bool /*isDefine*/)
1363{
1364 static TClassRef TBOClRef("TBranchObject");
1365
1366 TBranch *inputBranch = nullptr;
1367 if (inputTree) {
1368 inputBranch = inputTree->GetBranch(inName.c_str());
1369 if (!inputBranch) // try harder
1370 inputBranch = inputTree->FindBranch(inName.c_str());
1371 }
1372
1373 auto *outputBranch = outputBranches.Get(name);
1374 if (outputBranch) {
1375 // the output branch was already created, we just need to (re)set its address
1376 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1377 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1378 } else if (outputBranch->IsA() != TBranch::Class()) {
1379 branchAddress = address;
1380 outputBranch->SetAddress(&branchAddress);
1381 } else {
1382 outputBranch->SetAddress(address);
1383 branchAddress = address;
1384 }
1385 return;
1386 }
1387
1388 if (inputBranch) {
1389 // Respect the original bufsize and splitlevel arguments
1390 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1391 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1392 // TObject branch, see https://bit.ly/2EjLMId ).
1393 const auto bufSize = inputBranch->GetBasketSize();
1394 const auto splitLevel = inputBranch->GetSplitLevel();
1395
1396 if (inputBranch->IsA() == TBOClRef) {
1397 // Need to pass a pointer to pointer
1398 outputBranch =
1399 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1400 } else {
1401 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1402 }
1403 } else {
1404 outputBranch = outputTree.Branch(name.c_str(), address);
1405 }
1406 outputBranches.Insert(name, outputBranch);
1407 // This is not an array branch, so we don't register the address of the output branch here
1408 branch = nullptr;
1409 branchAddress = nullptr;
1410}
1411
1412/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1413/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1414/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1415/// title
1416/// 2. RVecs coming from a custom column or the input file/data-source
1417/// 3. vectors coming from ROOT files that are being read as RVecs
1418/// 4. TClonesArray
1419///
1420/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1421/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1422template <typename T>
1423void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1424 TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
1425{
1426 TBranch *inputBranch = nullptr;
1427 if (inputTree) {
1428 inputBranch = inputTree->GetBranch(inName.c_str());
1429 if (!inputBranch) // try harder
1430 inputBranch = inputTree->FindBranch(inName.c_str());
1431 }
1432 auto *outputBranch = outputBranches.Get(outName);
1433
1434 // if no backing input branch, we must write out an RVec
1435 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1436 // otherwise, if input branch is TClonesArray, must write out an RVec
1437 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1438 mustWriteRVec = true;
1439 Warning("Snapshot",
1440 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1441 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1442 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1443 inName.c_str());
1444 }
1445 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1446 if (!mustWriteRVec) {
1447 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1449 mustWriteRVec = true;
1450 }
1451
1452 if (mustWriteRVec) {
1453 // Treat:
1454 // 2. RVec coming from a custom column or a source
1455 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1456 // 4. TClonesArray written out as RVec<T>
1457 if (outputBranch) {
1458 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1459 outputBranch->SetObject(ab);
1460 } else {
1461 auto *b = outputTree.Branch(outName.c_str(), ab);
1462 outputBranches.Insert(outName, b);
1463 }
1464 return;
1465 }
1466
1467 // else this must be a C-array, aka case 1.
1468 auto dataPtr = ab->data();
1469
1470 if (outputBranch) {
1471 if (outputBranch->IsA() != TBranch::Class()) {
1472 branchAddress = dataPtr;
1473 outputBranch->SetAddress(&branchAddress);
1474 } else {
1475 outputBranch->SetAddress(dataPtr);
1476 }
1477 } else {
1478 // must construct the leaflist for the output branch and create the branch in the output tree
1479 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1480 const auto bname = leaf->GetName();
1481 auto *sizeLeaf = leaf->GetLeafCount();
1482 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1483
1484 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1485 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1486 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1487 // branch to be successful. So we create the size leaf here.
1488 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1489 const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
1490 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1491 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1492 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1493 outputBranches.Insert(sizeLeafName, sizeBranch);
1494 }
1495
1496 const auto btype = leaf->GetTypeName();
1497 const auto rootbtype = TypeName2ROOTTypeName(btype);
1498 if (rootbtype == ' ') {
1499 Warning("Snapshot",
1500 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1501 "column will not be written out.",
1502 bname);
1503 } else {
1504 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1505 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1506 outputBranch->SetTitle(inputBranch->GetTitle());
1507 outputBranches.Insert(outName, outputBranch);
1508 branch = outputBranch;
1509 branchAddress = ab->data();
1510 }
1511 }
1512}
1513
1514void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1515
1516/// Helper object for a single-thread Snapshot action
1517template <typename... ColTypes>
1518class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl<SnapshotHelper<ColTypes...>> {
1519 const std::string fFileName;
1520 const std::string fDirName;
1521 const std::string fTreeName;
1522 const RSnapshotOptions fOptions;
1523 std::unique_ptr<TFile> fOutputFile;
1524 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1525 bool fBranchAddressesNeedReset{true};
1526 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1527 const ColumnNames_t fOutputBranchNames;
1528 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1529 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1530 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1531 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1532 RBranchSet fOutputBranches;
1533 std::vector<bool> fIsDefine;
1534
1535public:
1536 using ColumnTypes_t = TypeList<ColTypes...>;
1537 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1538 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1539 std::vector<bool> &&isDefine)
1540 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1541 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1542 fBranchAddresses(vbnames.size(), nullptr), fIsDefine(std::move(isDefine))
1543 {
1544 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1545 }
1546
1547 SnapshotHelper(const SnapshotHelper &) = delete;
1548 SnapshotHelper(SnapshotHelper &&) = default;
1549
1550 void InitTask(TTreeReader *r, unsigned int /* slot */)
1551 {
1552 if (r)
1553 fInputTree = r->GetTree();
1554 fBranchAddressesNeedReset = true;
1555 }
1556
1557 void Exec(unsigned int /* slot */, ColTypes &... values)
1558 {
1559 using ind_t = std::index_sequence_for<ColTypes...>;
1560 if (!fBranchAddressesNeedReset) {
1561 UpdateCArraysPtrs(values..., ind_t{});
1562 } else {
1563 SetBranches(values..., ind_t{});
1564 fBranchAddressesNeedReset = false;
1565 }
1566 fOutputTree->Fill();
1567 }
1568
1569 template <std::size_t... S>
1570 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1571 {
1572 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1573 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1574 // leaving associated to the branch of the output tree an invalid pointer.
1575 // With this code, we set the value of the pointer in the output branch anew when needed.
1576 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1577 // we need an int for the expander list.
1578 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1579 ? fBranches[S]->SetAddress(GetData(values)),
1580 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1581 0};
1582 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1583 }
1584
1585 template <std::size_t... S>
1586 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1587 {
1588 // create branches in output tree
1589 int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
1590 fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
1591 0)...,
1592 0};
1593 fOutputBranches.AssertNoNullBranchAddresses();
1594 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1595 }
1596
1597 void Initialize()
1598 {
1599 fOutputFile.reset(
1600 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1602 if(!fOutputFile)
1603 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1604
1605 TDirectory *outputDir = fOutputFile.get();
1606 if (!fDirName.empty()) {
1607 TString checkupdate = fOptions.fMode;
1608 checkupdate.ToLower();
1609 if (checkupdate == "update")
1610 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1611 else
1612 outputDir = fOutputFile->mkdir(fDirName.c_str());
1613 }
1614
1615 fOutputTree =
1616 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1617
1618 if (fOptions.fAutoFlush)
1619 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1620 }
1621
1622 void Finalize()
1623 {
1624 assert(fOutputTree != nullptr);
1625 assert(fOutputFile != nullptr);
1626
1627 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1628 fOutputTree->AutoSave("flushbaskets");
1629 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1630 fOutputTree.reset();
1631 fOutputFile->Close();
1632 }
1633
1634 std::string GetActionName() { return "Snapshot"; }
1635
1636 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1637 {
1638 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1639 }
1640};
1641
1642/// Helper object for a multi-thread Snapshot action
1643template <typename... ColTypes>
1644class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<ColTypes...>> {
1645 const unsigned int fNSlots;
1646 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1647 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1648 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1649 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1650 const std::string fFileName; // name of the output file name
1651 const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1652 const std::string fTreeName; // name of output tree
1653 const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1654 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1655 const ColumnNames_t fOutputBranchNames;
1656 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1657 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1658 std::vector<std::vector<TBranch *>> fBranches;
1659 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1660 std::vector<std::vector<void *>> fBranchAddresses;
1661 std::vector<RBranchSet> fOutputBranches;
1662 std::vector<bool> fIsDefine;
1663
1664public:
1665 using ColumnTypes_t = TypeList<ColTypes...>;
1666 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1667 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1668 const RSnapshotOptions &options, std::vector<bool> &&isDefine)
1669 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fBranchAddressesNeedReset(fNSlots, 1),
1670 fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1671 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots),
1672 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1673 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)), fOutputBranches(fNSlots),
1674 fIsDefine(std::move(isDefine))
1675 {
1676 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1677 }
1678 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1679 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1680
1681 void InitTask(TTreeReader *r, unsigned int slot)
1682 {
1683 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1684 if (!fOutputFiles[slot]) {
1685 // first time this thread executes something, let's create a TBufferMerger output directory
1686 fOutputFiles[slot] = fMerger->GetFile();
1687 }
1688 TDirectory *treeDirectory = fOutputFiles[slot].get();
1689 if (!fDirName.empty()) {
1690 // call returnExistingDirectory=true since MT can end up making this call multiple times
1691 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1692 }
1693 // re-create output tree as we need to create its branches again, with new input variables
1694 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1695 fOutputTrees[slot] =
1696 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1697 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1698 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1699 fOutputTrees[slot]->SetImplicitMT(false);
1700 if (fOptions.fAutoFlush)
1701 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1702 if (r) {
1703 // not an empty-source RDF
1704 fInputTrees[slot] = r->GetTree();
1705 }
1706 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1707 }
1708
1709 void FinalizeTask(unsigned int slot)
1710 {
1711 if (fOutputTrees[slot]->GetEntries() > 0)
1712 fOutputFiles[slot]->Write();
1713 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1714 fOutputTrees[slot].reset(nullptr);
1715 fOutputBranches[slot].Clear();
1716 }
1717
1718 void Exec(unsigned int slot, ColTypes &... values)
1719 {
1720 using ind_t = std::index_sequence_for<ColTypes...>;
1721 if (fBranchAddressesNeedReset[slot] == 0) {
1722 UpdateCArraysPtrs(slot, values..., ind_t{});
1723 } else {
1724 SetBranches(slot, values..., ind_t{});
1725 fBranchAddressesNeedReset[slot] = 0;
1726 }
1727 fOutputTrees[slot]->Fill();
1728 auto entries = fOutputTrees[slot]->GetEntries();
1729 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1730 if ((autoFlush > 0) && (entries % autoFlush == 0))
1731 fOutputFiles[slot]->Write();
1732 }
1733
1734 template <std::size_t... S>
1735 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1736 {
1737 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1738 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1739 // leaving associated to the branch of the output tree an invalid pointer.
1740 // With this code, we set the value of the pointer in the output branch anew when needed.
1741 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1742 // we need an int for the expander list.
1743 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1744 ? fBranches[slot][S]->SetAddress(GetData(values)),
1745 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1746 0};
1747 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1748 }
1749
1750 template <std::size_t... S>
1751 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1752 {
1753 // hack to call TTree::Branch on all variadic template arguments
1754 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1755 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1756 &values, fOutputBranches[slot], fIsDefine[S]),
1757 0)...,
1758 0};
1759 fOutputBranches[slot].AssertNoNullBranchAddresses();
1760 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1761 }
1762
1763 void Initialize()
1764 {
1765 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1766 auto out_file = TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs);
1767 if(!out_file)
1768 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1769 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(out_file));
1770 }
1771
1772 void Finalize()
1773 {
1774 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1775
1776 auto fileWritten = false;
1777 for (auto &file : fOutputFiles) {
1778 if (file) {
1779 file->Write();
1780 file->Close();
1781 fileWritten = true;
1782 }
1783 }
1784
1785 if (!fileWritten) {
1786 Warning("Snapshot",
1787 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1788 }
1789
1790 // flush all buffers to disk by destroying the TBufferMerger
1791 fOutputFiles.clear();
1792 fMerger.reset();
1793 }
1794
1795 std::string GetActionName() { return "Snapshot"; }
1796
1797 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1798 {
1799 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1800 }
1801};
1802
1803template <typename Acc, typename Merge, typename R, typename T, typename U,
1804 bool MustCopyAssign = std::is_same<R, U>::value>
1805class R__CLING_PTRCHECK(off) AggregateHelper
1806 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1807 Acc fAggregate;
1808 Merge fMerge;
1809 const std::shared_ptr<U> fResult;
1810 Results<U> fAggregators;
1811
1812public:
1813 using ColumnTypes_t = TypeList<T>;
1814
1815 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1816 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1817 {
1818 }
1819
1820 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1821 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
1822 {
1823 }
1824
1825 AggregateHelper(AggregateHelper &&) = default;
1826 AggregateHelper(const AggregateHelper &) = delete;
1827
1828 void InitTask(TTreeReader *, unsigned int) {}
1829
1830 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
1831 void Exec(unsigned int slot, const T &value)
1832 {
1833 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1834 }
1835
1836 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
1837 void Exec(unsigned int slot, const T &value)
1838 {
1839 fAggregate(fAggregators[slot], value);
1840 }
1841
1842 void Initialize() { /* noop */}
1843
1844 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1846 std::enable_if_t<MergeAll, void> Finalize()
1847 {
1848 fMerge(fAggregators);
1849 *fResult = fAggregators[0];
1850 }
1851
1852 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1853 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1854 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
1855 {
1856 for (const auto &acc : fAggregators)
1857 *fResult = fMerge(*fResult, acc);
1858 }
1859
1860 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1861
1862 std::string GetActionName() { return "Aggregate"; }
1863
1864 AggregateHelper MakeNew(void *newResult)
1865 {
1866 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
1867 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
1868 }
1869};
1870
1871} // end of NS RDF
1872} // end of NS Internal
1873} // end of NS ROOT
1874
1875/// \endcond
1876
1877#endif
PyObject * fCallable
Definition: CPPOverload.cxx:41
Handle_t Display_t
Display handle.
Definition: GuiTypes.h:27
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition: RtypesCore.h:59
unsigned long long ULong64_t
Definition: RtypesCore.h:81
#define R__CLING_PTRCHECK(ONOFF)
Definition: Rtypes.h:498
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition: TError.cxx:231
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t g
char name[80]
Definition: TGX11.cxx:110
TClass * IsA() const override
Definition: TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition: TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
Definition: RActionImpl.hxx:26
pointer data() noexcept
Return a pointer to the vector's buffer, even if empty().
Definition: RVec.hxx:279
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:64
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
Definition: RSampleInfo.hxx:32
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:169
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:1456
A TTree is a list of TBranches.
Definition: TBranch.h:89
virtual const char * GetClassName() const
Return the name of the user class whose content is stored in this branch, if any.
Definition: TBranch.cxx:1322
virtual char * GetAddress() const
Definition: TBranch.h:208
static TClass * Class()
Int_t GetSplitLevel() const
Definition: TBranch.h:246
TClass * IsA() const override
Definition: TBranch.h:291
virtual Int_t GetBasketSize() const
Definition: TBranch.h:213
TObjArray * GetListOfLeaves()
Definition: TBranch.h:243
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:28
Collection abstract base class.
Definition: TCollection.h:65
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
Describe directory structure in memory.
Definition: TDirectory.h:45
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:4019
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:617
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:57
A doubly linked list.
Definition: TList.h:38
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition: TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition: TNamed.h:48
TObject * UncheckedAt(Int_t i) const
Definition: TObjArray.h:84
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1155
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
A TTree represents a columnar dataset.
Definition: TTree.h:79
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition: TTree.cxx:4809
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5262
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:350
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition: TTree.h:258
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
#define F(x, y, z)
#define H(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:333
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
Definition: RResultMap.hxx:174
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:296
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:252
constexpr std::size_t FindIdxTrue(const T &arr)
Definition: Utils.hxx:229
void(off) SmallVectorTemplateBase< T
double T(double x)
Definition: ChebyshevPol.h:34
double inner_product(const LAVector &, const LAVector &)
std::vector< std::string > ColumnNames_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with a RDataFrame computation graph via e....
Definition: RSampleInfo.hxx:84
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
@ kROOTRVec
Definition: ESTLType.h:46
@ kSTLvector
Definition: ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:240
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
Definition: TClassEdit.cxx:528
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
static constexpr bool value
Definition: Utils.hxx:101
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25
TMarker m
Definition: textangle.C:8
TLine l
Definition: textangle.C:4