Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include <string_view>
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46#include "TStatistic.h"
49
50#include <algorithm>
51#include <functional>
52#include <limits>
53#include <memory>
54#include <stdexcept>
55#include <string>
56#include <type_traits>
57#include <utility> // std::index_sequence
58#include <vector>
59#include <iomanip>
60#include <numeric> // std::accumulate in MeanHelper
61
62/// \cond HIDDEN_SYMBOLS
63
64namespace ROOT {
65namespace Internal {
66namespace RDF {
67using namespace ROOT::TypeTraits;
68using namespace ROOT::VecOps;
69using namespace ROOT::RDF;
70using namespace ROOT::Detail::RDF;
71
72using Hist_t = ::TH1D;
73
74class RBranchSet {
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
77
78public:
79 TBranch *Get(const std::string &name) const
80 {
81 auto it = std::find(fNames.begin(), fNames.end(), name);
82 if (it == fNames.end())
83 return nullptr;
84 return fBranches[std::distance(fNames.begin(), it)];
85 }
86
87 void Insert(const std::string &name, TBranch *address)
88 {
89 if (address == nullptr) {
90 throw std::logic_error("Trying to insert a null branch address.");
91 }
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error("Trying to insert a branch address that's already present.");
94 }
95 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
96 throw std::logic_error("Trying to insert a branch name that's already present.");
97 }
98 fNames.emplace_back(name);
99 fBranches.emplace_back(address);
100 }
101
102 void Clear()
103 {
104 fBranches.clear();
105 fNames.clear();
106 }
107
108 void AssertNoNullBranchAddresses()
109 {
110 std::vector<TBranch *> branchesWithNullAddress;
111 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
112 [](TBranch *b) { return b->GetAddress() == nullptr; });
113
114 if (branchesWithNullAddress.empty())
115 return;
116
117 // otherwise build error message and throw
118 std::vector<std::string> missingBranchNames;
119 std::transform(branchesWithNullAddress.begin(), branchesWithNullAddress.end(),
120 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
121 std::string msg = "RDataFrame::Snapshot:";
122 if (missingBranchNames.size() == 1) {
123 msg += " branch " + missingBranchNames[0] +
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
125 "it is";
126 } else {
127 msg += " branches ";
128 for (const auto &bName : missingBranchNames)
129 msg += bName + ", ";
130 msg.resize(msg.size() - 2); // remove last ", "
131 msg +=
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
133 }
134 msg += " not part of the set of branches that are being written out.";
135 throw std::runtime_error(msg);
136 }
137};
138
139/// The container type for each thread's partial result in an action helper
140// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
141// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
142// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
143template <typename T>
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
145
146template <typename F>
147class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
148 F fCallable;
149
150public:
152 ForeachSlotHelper(F &&f) : fCallable(f) {}
153 ForeachSlotHelper(ForeachSlotHelper &&) = default;
154 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
155
156 void InitTask(TTreeReader *, unsigned int) {}
157
158 template <typename... Args>
159 void Exec(unsigned int slot, Args &&... args)
160 {
161 // check that the decayed types of Args are the same as the branch types
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
163 fCallable(slot, std::forward<Args>(args)...);
164 }
165
166 void Initialize() { /* noop */}
167
168 void Finalize() { /* noop */}
169
170 std::string GetActionName() { return "ForeachSlot"; }
171};
172
173class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
174 std::shared_ptr<ULong64_t> fResultCount;
175 Results<ULong64_t> fCounts;
176
177public:
178 using ColumnTypes_t = TypeList<>;
179 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
180 CountHelper(CountHelper &&) = default;
181 CountHelper(const CountHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot);
184 void Initialize() { /* noop */}
185 void Finalize();
186
187 // Helper functions for RMergeableValue
188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
189 {
190 return std::make_unique<RMergeableCount>(*fResultCount);
191 }
192
193 ULong64_t &PartialUpdate(unsigned int slot);
194
195 std::string GetActionName() { return "Count"; }
196
197 CountHelper MakeNew(void *newResult)
198 {
199 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
200 return CountHelper(result, fCounts.size());
201 }
202};
203
204template <typename RNode_t>
205class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<RNode_t>> {
206 std::shared_ptr<RCutFlowReport> fReport;
207 /// Non-owning pointer, never null. As usual, the node is owned by its children nodes (and therefore indirectly by
208 /// the RAction corresponding to this action helper).
209 RNode_t *fNode;
210 bool fReturnEmptyReport;
211
212public:
213 using ColumnTypes_t = TypeList<>;
214 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, RNode_t *node, bool emptyRep)
215 : fReport(report), fNode(node), fReturnEmptyReport(emptyRep){};
216 ReportHelper(ReportHelper &&) = default;
217 ReportHelper(const ReportHelper &) = delete;
218 void InitTask(TTreeReader *, unsigned int) {}
219 void Exec(unsigned int /* slot */) {}
220 void Initialize() { /* noop */}
221 void Finalize()
222 {
223 if (!fReturnEmptyReport)
224 fNode->Report(*fReport);
225 }
226
227 std::string GetActionName() { return "Report"; }
228
229 // TODO implement MakeNew. Requires some smartness in passing the appropriate previous node.
230};
231
232/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
233///
234/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
235/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
236/// thread, making it impossible to merge the per-thread results.
237/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
238/// the decision on the limits as part of the merge operation.
239class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
240 // this sets a total initial size of 16 MB for the buffers (can increase)
241 static constexpr unsigned int fgTotalBufSize = 2097152;
242 using BufEl_t = double;
243 using Buf_t = std::vector<BufEl_t>;
244
245 std::vector<Buf_t> fBuffers;
246 std::vector<Buf_t> fWBuffers;
247 std::shared_ptr<Hist_t> fResultHist;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
250 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
251 Results<std::unique_ptr<Hist_t>> fPartialHists;
252 Buf_t fMin;
253 Buf_t fMax;
254
255 void UpdateMinMax(unsigned int slot, double v);
256
257public:
258 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
259 BufferedFillHelper(BufferedFillHelper &&) = default;
260 BufferedFillHelper(const BufferedFillHelper &) = delete;
261 void InitTask(TTreeReader *, unsigned int) {}
262 void Exec(unsigned int slot, double v);
263 void Exec(unsigned int slot, double v, double w);
264
265 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
266 void Exec(unsigned int slot, const T &vs)
267 {
268 auto &thisBuf = fBuffers[slot];
269 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
270 for (auto v = vs.begin(); v != vs.end(); ++v) {
271 UpdateMinMax(slot, *v);
272 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
273 }
274 }
275
276 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && IsDataContainer<W>::value, int> = 0>
277 void Exec(unsigned int slot, const T &vs, const W &ws)
278 {
279 auto &thisBuf = fBuffers[slot];
280
281 for (auto &v : vs) {
282 UpdateMinMax(slot, v);
283 thisBuf.emplace_back(v);
284 }
285
286 auto &thisWBuf = fWBuffers[slot];
287 for (auto &w : ws) {
288 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
289 }
290 }
291
292 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && !IsDataContainer<W>::value, int> = 0>
293 void Exec(unsigned int slot, const T &vs, const W w)
294 {
295 auto &thisBuf = fBuffers[slot];
296 for (auto &v : vs) {
297 UpdateMinMax(slot, v);
298 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
299 }
300
301 auto &thisWBuf = fWBuffers[slot];
302 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
303 }
304
305 template <typename T, typename W, std::enable_if_t<IsDataContainer<W>::value && !IsDataContainer<T>::value, int> = 0>
306 void Exec(unsigned int slot, const T v, const W &ws)
307 {
308 UpdateMinMax(slot, v);
309 auto &thisBuf = fBuffers[slot];
310 thisBuf.insert(thisBuf.end(), ws.size(), v);
311
312 auto &thisWBuf = fWBuffers[slot];
313 thisWBuf.insert(thisWBuf.end(), ws.begin(), ws.end());
314 }
315
316 Hist_t &PartialUpdate(unsigned int);
317
318 void Initialize() { /* noop */}
319
320 void Finalize();
321
322 // Helper functions for RMergeableValue
323 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
324 {
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
326 }
327
328 std::string GetActionName()
329 {
330 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
331 }
332
333 BufferedFillHelper MakeNew(void *newResult)
334 {
335 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
336 result->Reset();
337 result->SetDirectory(nullptr);
338 return BufferedFillHelper(result, fNSlots);
339 }
340};
341
342/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
343/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
344template <typename HIST = Hist_t>
345class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
346 std::vector<HIST *> fObjects;
347
348 template <typename H = HIST, typename = decltype(std::declval<H>().Reset())>
349 void ResetIfPossible(H *h)
350 {
351 h->Reset();
352 }
353
354 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
355
356 // cannot safely re-initialize variations of the result, hence error out
357 void ResetIfPossible(...)
358 {
359 throw std::runtime_error(
360 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
361 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
362 }
363
364 void UnsetDirectoryIfPossible(TH1 *h) {
365 h->SetDirectory(nullptr);
366 }
367
368 void UnsetDirectoryIfPossible(...) {}
369
370 // Merge overload for types with Merge(TCollection*), like TH1s
371 template <typename H, typename = std::enable_if_t<std::is_base_of<TObject, H>::value, int>>
372 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
373 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
374 {
375 TList l;
376 for (auto it = ++objs.begin(); it != objs.end(); ++it)
377 l.Add(*it);
378 objs[0]->Merge(&l);
379 }
380
381 // Merge overload for types with Merge(const std::vector&)
382 template <typename H>
383 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
384 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
385 {
386 objs[0]->Merge({++objs.begin(), objs.end()});
387 }
388
389 // Merge overload to error out in case no valid HIST::Merge method was detected
390 template <typename T>
391 void Merge(T, ...)
392 {
393 static_assert(sizeof(T) < 0,
394 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
395 }
396
397 // class which wraps a pointer and implements a no-op increment operator
398 template <typename T>
399 class ScalarConstIterator {
400 const T *obj_;
401
402 public:
403 ScalarConstIterator(const T *obj) : obj_(obj) {}
404 const T &operator*() const { return *obj_; }
405 ScalarConstIterator<T> &operator++() { return *this; }
406 };
407
408 // helper functions which provide one implementation for scalar types and another for containers
409 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
410 // in c++17 or later
411
412 // return unchanged value for scalar
413 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
414 ScalarConstIterator<T> MakeBegin(const T &val)
415 {
416 return ScalarConstIterator<T>(&val);
417 }
418
419 // return iterator to beginning of container
420 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
421 auto MakeBegin(const T &val)
422 {
423 return std::begin(val);
424 }
425
426 // return 1 for scalars
427 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
428 std::size_t GetSize(const T &)
429 {
430 return 1;
431 }
432
433 // return container size
434 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
435 std::size_t GetSize(const T &val)
436 {
437#if __cplusplus >= 201703L
438 return std::size(val);
439#else
440 return val.size();
441#endif
442 }
443
444 template <std::size_t ColIdx, typename End_t, typename... Its>
445 void ExecLoop(unsigned int slot, End_t end, Its... its)
446 {
447 auto *thisSlotH = fObjects[slot];
448 // loop increments all of the iterators while leaving scalars unmodified
449 // TODO this could be simplified with fold expressions or std::apply in C++17
450 auto nop = [](auto &&...) {};
451 for (; GetNthElement<ColIdx>(its...) != end; nop(++its...)) {
452 thisSlotH->Fill(*its...);
453 }
454 }
455
456public:
457 FillHelper(FillHelper &&) = default;
458 FillHelper(const FillHelper &) = delete;
459
460 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
461 {
462 fObjects[0] = h.get();
463 // Initialize all other slots
464 for (unsigned int i = 1; i < nSlots; ++i) {
465 fObjects[i] = new HIST(*fObjects[0]);
466 UnsetDirectoryIfPossible(fObjects[i]);
467 }
468 }
469
470 void InitTask(TTreeReader *, unsigned int) {}
471
472 // no container arguments
473 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
474 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
475 {
476 fObjects[slot]->Fill(x...);
477 }
478
479 // at least one container argument
480 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
481 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
482 {
483 // array of bools keeping track of which inputs are containers
484 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
485
486 // index of the first container input
487 constexpr std::size_t colidx = FindIdxTrue(isContainer);
488 // if this happens, there is a bug in the implementation
489 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
490
491 // get the end iterator to the first container
492 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
493
494 // array of container sizes (1 for scalars)
495 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
496
497 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
498 if (isContainer[i] && sizes[i] != sizes[colidx]) {
499 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
500 }
501 }
502
503 ExecLoop<colidx>(slot, xrefend, MakeBegin(xs)...);
504 }
505
506 template <typename T = HIST>
507 void Exec(...)
508 {
509 static_assert(sizeof(T) < 0,
510 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
511 "columns passed did not match the signature of the object's `Fill` method.");
512 }
513
514 void Initialize() { /* noop */}
515
516 void Finalize()
517 {
518 if (fObjects.size() == 1)
519 return;
520
521 Merge(fObjects, /*toselectcorrectoverload=*/0);
522
523 // delete the copies we created for the slots other than the first
524 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
525 delete *it;
526 }
527
528 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
529
530 // Helper functions for RMergeableValue
531 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
532 {
533 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
534 }
535
536 // if the fObjects vector type is derived from TObject, return the name of the object
537 template <typename T = HIST, std::enable_if_t<std::is_base_of<TObject, T>::value, int> = 0>
538 std::string GetActionName()
539 {
540 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
541 }
542
543 // if fObjects is not derived from TObject, indicate it is some other object
544 template <typename T = HIST, std::enable_if_t<!std::is_base_of<TObject, T>::value, int> = 0>
545 std::string GetActionName()
546 {
547 return "Fill custom object";
548 }
549
550 template <typename H = HIST>
551 FillHelper MakeNew(void *newResult)
552 {
553 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
554 ResetIfPossible(result.get());
555 UnsetDirectoryIfPossible(result.get());
556 return FillHelper(result, fObjects.size());
557 }
558};
559
560class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
561public:
562 using Result_t = ::TGraph;
563
564private:
565 std::vector<::TGraph *> fGraphs;
566
567public:
568 FillTGraphHelper(FillTGraphHelper &&) = default;
569 FillTGraphHelper(const FillTGraphHelper &) = delete;
570
571 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
572 {
573 fGraphs[0] = g.get();
574 // Initialize all other slots
575 for (unsigned int i = 1; i < nSlots; ++i) {
576 fGraphs[i] = new TGraph(*fGraphs[0]);
577 }
578 }
579
580 void Initialize() {}
581 void InitTask(TTreeReader *, unsigned int) {}
582
583 // case: both types are container types
584 template <typename X0, typename X1,
585 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int> = 0>
586 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
587 {
588 if (x0s.size() != x1s.size()) {
589 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
590 }
591 auto *thisSlotG = fGraphs[slot];
592 auto x0sIt = std::begin(x0s);
593 const auto x0sEnd = std::end(x0s);
594 auto x1sIt = std::begin(x1s);
595 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
596 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
597 }
598 }
599
600 // case: both types are non-container types, e.g. scalars
601 template <typename X0, typename X1,
602 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value, int> = 0>
603 void Exec(unsigned int slot, X0 x0, X1 x1)
604 {
605 auto thisSlotG = fGraphs[slot];
606 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
607 }
608
609 // case: types are combination of containers and non-containers
610 // this is not supported, error out
611 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
612 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
613 {
614 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
615 }
616
617 void Finalize()
618 {
619 const auto nSlots = fGraphs.size();
620 auto resGraph = fGraphs[0];
621 TList l;
622 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
623 for (unsigned int slot = 1; slot < nSlots; ++slot) {
624 l.Add(fGraphs[slot]);
625 }
626 resGraph->Merge(&l);
627 }
628
629 // Helper functions for RMergeableValue
630 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
631 {
632 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
633 }
634
635 std::string GetActionName() { return "Graph"; }
636
637 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
638
639 FillTGraphHelper MakeNew(void *newResult)
640 {
641 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
642 result->Set(0);
643 return FillTGraphHelper(result, fGraphs.size());
644 }
645};
646
647class R__CLING_PTRCHECK(off) FillTGraphAsymmErrorsHelper
648 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
649public:
650 using Result_t = ::TGraphAsymmErrors;
651
652private:
653 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
654
655public:
656 FillTGraphAsymmErrorsHelper(FillTGraphAsymmErrorsHelper &&) = default;
657 FillTGraphAsymmErrorsHelper(const FillTGraphAsymmErrorsHelper &) = delete;
658
659 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
660 : fGraphAsymmErrors(nSlots, nullptr)
661 {
662 fGraphAsymmErrors[0] = g.get();
663 // Initialize all other slots
664 for (unsigned int i = 1; i < nSlots; ++i) {
665 fGraphAsymmErrors[i] = new TGraphAsymmErrors(*fGraphAsymmErrors[0]);
666 }
667 }
668
669 void Initialize() {}
670 void InitTask(TTreeReader *, unsigned int) {}
671
672 // case: all types are container types
673 template <
674 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
675 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
676 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
677 int> = 0>
678 void
679 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
680 {
681 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
682 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
683 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
684 }
685 auto *thisSlotG = fGraphAsymmErrors[slot];
686 auto xsIt = std::begin(xs);
687 auto ysIt = std::begin(ys);
688 auto exlsIt = std::begin(exls);
689 auto exhsIt = std::begin(exhs);
690 auto eylsIt = std::begin(eyls);
691 auto eyhsIt = std::begin(eyhs);
692 while (xsIt != std::end(xs)) {
693 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
694 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
695 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
696 }
697 }
698
699 // case: all types are non-container types, e.g. scalars
700 template <
701 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
702 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
703 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
704 int> = 0>
705 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
706 {
707 auto thisSlotG = fGraphAsymmErrors[slot];
708 const auto n = thisSlotG->GetN();
709 thisSlotG->SetPoint(n, x, y);
710 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
711 }
712
713 // case: types are combination of containers and non-containers
714 // this is not supported, error out
715 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
716 typename... ExtraArgsToLowerPriority>
717 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
718 {
719 throw std::runtime_error(
720 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
721 }
722
723 void Finalize()
724 {
725 const auto nSlots = fGraphAsymmErrors.size();
726 auto resGraphAsymmErrors = fGraphAsymmErrors[0];
727 TList l;
728 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
729 for (unsigned int slot = 1; slot < nSlots; ++slot) {
730 l.Add(fGraphAsymmErrors[slot]);
731 }
732 resGraphAsymmErrors->Merge(&l);
733 }
734
735 // Helper functions for RMergeableValue
736 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
737 {
738 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
739 }
740
741 std::string GetActionName() { return "GraphAsymmErrors"; }
742
743 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
744
745 FillTGraphAsymmErrorsHelper MakeNew(void *newResult)
746 {
747 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
748 result->Set(0);
749 return FillTGraphAsymmErrorsHelper(result, fGraphAsymmErrors.size());
750 }
751};
752
753// In case of the take helper we have 4 cases:
754// 1. The column is not an RVec, the collection is not a vector
755// 2. The column is not an RVec, the collection is a vector
756// 3. The column is an RVec, the collection is not a vector
757// 4. The column is an RVec, the collection is a vector
758
759template <typename V, typename COLL>
760void FillColl(V&& v, COLL& c) {
761 c.emplace_back(v);
762}
763
764// Use push_back for bool since some compilers do not support emplace_back.
765template <typename COLL>
766void FillColl(bool v, COLL& c) {
767 c.push_back(v);
768}
769
770// Case 1.: The column is not an RVec, the collection is not a vector
771// No optimisations, no transformations: just copies.
772template <typename RealT_t, typename T, typename COLL>
773class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
774 Results<std::shared_ptr<COLL>> fColls;
775
776public:
777 using ColumnTypes_t = TypeList<T>;
778 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
779 {
780 fColls.emplace_back(resultColl);
781 for (unsigned int i = 1; i < nSlots; ++i)
782 fColls.emplace_back(std::make_shared<COLL>());
783 }
784 TakeHelper(TakeHelper &&);
785 TakeHelper(const TakeHelper &) = delete;
786
787 void InitTask(TTreeReader *, unsigned int) {}
788
789 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
790
791 void Initialize() { /* noop */}
792
793 void Finalize()
794 {
795 auto rColl = fColls[0];
796 for (unsigned int i = 1; i < fColls.size(); ++i) {
797 const auto &coll = fColls[i];
798 const auto end = coll->end();
799 // Use an explicit loop here to prevent compiler warnings introduced by
800 // clang's range-based loop analysis and vector<bool> references.
801 for (auto j = coll->begin(); j != end; j++) {
802 FillColl(*j, *rColl);
803 }
804 }
805 }
806
807 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
808
809 std::string GetActionName() { return "Take"; }
810
811 TakeHelper MakeNew(void *newResult)
812 {
813 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
814 result->clear();
815 return TakeHelper(result, fColls.size());
816 }
817};
818
819// Case 2.: The column is not an RVec, the collection is a vector
820// Optimisations, no transformations: just copies.
821template <typename RealT_t, typename T>
822class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
823 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
824 Results<std::shared_ptr<std::vector<T>>> fColls;
825
826public:
827 using ColumnTypes_t = TypeList<T>;
828 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
829 {
830 fColls.emplace_back(resultColl);
831 for (unsigned int i = 1; i < nSlots; ++i) {
832 auto v = std::make_shared<std::vector<T>>();
833 v->reserve(1024);
834 fColls.emplace_back(v);
835 }
836 }
837 TakeHelper(TakeHelper &&);
838 TakeHelper(const TakeHelper &) = delete;
839
840 void InitTask(TTreeReader *, unsigned int) {}
841
842 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
843
844 void Initialize() { /* noop */}
845
846 // This is optimised to treat vectors
847 void Finalize()
848 {
849 ULong64_t totSize = 0;
850 for (auto &coll : fColls)
851 totSize += coll->size();
852 auto rColl = fColls[0];
853 rColl->reserve(totSize);
854 for (unsigned int i = 1; i < fColls.size(); ++i) {
855 auto &coll = fColls[i];
856 rColl->insert(rColl->end(), coll->begin(), coll->end());
857 }
858 }
859
860 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
861
862 std::string GetActionName() { return "Take"; }
863
864 TakeHelper MakeNew(void *newResult)
865 {
866 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
867 result->clear();
868 return TakeHelper(result, fColls.size());
869 }
870};
871
872// Case 3.: The column is a RVec, the collection is not a vector
873// No optimisations, transformations from RVecs to vectors
874template <typename RealT_t, typename COLL>
875class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, COLL>
876 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
877 Results<std::shared_ptr<COLL>> fColls;
878
879public:
880 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
881 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
882 {
883 fColls.emplace_back(resultColl);
884 for (unsigned int i = 1; i < nSlots; ++i)
885 fColls.emplace_back(std::make_shared<COLL>());
886 }
887 TakeHelper(TakeHelper &&);
888 TakeHelper(const TakeHelper &) = delete;
889
890 void InitTask(TTreeReader *, unsigned int) {}
891
892 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
893
894 void Initialize() { /* noop */}
895
896 void Finalize()
897 {
898 auto rColl = fColls[0];
899 for (unsigned int i = 1; i < fColls.size(); ++i) {
900 auto &coll = fColls[i];
901 for (auto &v : *coll) {
902 rColl->emplace_back(v);
903 }
904 }
905 }
906
907 std::string GetActionName() { return "Take"; }
908
909 TakeHelper MakeNew(void *newResult)
910 {
911 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
912 result->clear();
913 return TakeHelper(result, fColls.size());
914 }
915};
916
917// Case 4.: The column is an RVec, the collection is a vector
918// Optimisations, transformations from RVecs to vectors
919template <typename RealT_t>
920class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
921 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
922
923 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
924
925public:
926 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
927 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
928 {
929 fColls.emplace_back(resultColl);
930 for (unsigned int i = 1; i < nSlots; ++i) {
931 auto v = std::make_shared<std::vector<RealT_t>>();
932 v->reserve(1024);
933 fColls.emplace_back(v);
934 }
935 }
936 TakeHelper(TakeHelper &&);
937 TakeHelper(const TakeHelper &) = delete;
938
939 void InitTask(TTreeReader *, unsigned int) {}
940
941 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
942
943 void Initialize() { /* noop */}
944
945 // This is optimised to treat vectors
946 void Finalize()
947 {
948 ULong64_t totSize = 0;
949 for (auto &coll : fColls)
950 totSize += coll->size();
951 auto rColl = fColls[0];
952 rColl->reserve(totSize);
953 for (unsigned int i = 1; i < fColls.size(); ++i) {
954 auto &coll = fColls[i];
955 rColl->insert(rColl->end(), coll->begin(), coll->end());
956 }
957 }
958
959 std::string GetActionName() { return "Take"; }
960
961 TakeHelper MakeNew(void *newResult)
962 {
963 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
964 result->clear();
965 return TakeHelper(result, fColls.size());
966 }
967};
968
969// Extern templates for TakeHelper
970// NOTE: The move-constructor of specializations declared as extern templates
971// must be defined out of line, otherwise cling fails to find its symbol.
972template <typename RealT_t, typename T, typename COLL>
973TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
974template <typename RealT_t, typename T>
975TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
976template <typename RealT_t, typename COLL>
977TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
978template <typename RealT_t>
979TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
980
981// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
982#if __GNUC__ > 5
983extern template class TakeHelper<bool, bool, std::vector<bool>>;
984extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
985extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
986extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
987extern template class TakeHelper<int, int, std::vector<int>>;
988extern template class TakeHelper<long, long, std::vector<long>>;
989extern template class TakeHelper<long long, long long, std::vector<long long>>;
990extern template class TakeHelper<float, float, std::vector<float>>;
991extern template class TakeHelper<double, double, std::vector<double>>;
992#endif
993
994template <typename ResultType>
995class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
996 std::shared_ptr<ResultType> fResultMin;
997 Results<ResultType> fMins;
998
999public:
1000 MinHelper(MinHelper &&) = default;
1001 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1002 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1003 {
1004 }
1005
1006 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1007
1008 void InitTask(TTreeReader *, unsigned int) {}
1009
1010 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1011 void Exec(unsigned int slot, const T &vs)
1012 {
1013 for (auto &&v : vs)
1014 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1015 }
1016
1017 void Initialize() { /* noop */}
1018
1019 void Finalize()
1020 {
1021 *fResultMin = std::numeric_limits<ResultType>::max();
1022 for (auto &m : fMins)
1023 *fResultMin = std::min(m, *fResultMin);
1024 }
1025
1026 // Helper functions for RMergeableValue
1027 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1028 {
1029 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1030 }
1031
1032 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1033
1034 std::string GetActionName() { return "Min"; }
1035
1036 MinHelper MakeNew(void *newResult)
1037 {
1038 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1039 return MinHelper(result, fMins.size());
1040 }
1041};
1042
1043template <typename ResultType>
1044class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1045 std::shared_ptr<ResultType> fResultMax;
1046 Results<ResultType> fMaxs;
1047
1048public:
1049 MaxHelper(MaxHelper &&) = default;
1050 MaxHelper(const MaxHelper &) = delete;
1051 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1052 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1053 {
1054 }
1055
1056 void InitTask(TTreeReader *, unsigned int) {}
1057 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1058
1059 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1060 void Exec(unsigned int slot, const T &vs)
1061 {
1062 for (auto &&v : vs)
1063 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1064 }
1065
1066 void Initialize() { /* noop */}
1067
1068 void Finalize()
1069 {
1070 *fResultMax = std::numeric_limits<ResultType>::lowest();
1071 for (auto &m : fMaxs) {
1072 *fResultMax = std::max(m, *fResultMax);
1073 }
1074 }
1075
1076 // Helper functions for RMergeableValue
1077 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1078 {
1079 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1080 }
1081
1082 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1083
1084 std::string GetActionName() { return "Max"; }
1085
1086 MaxHelper MakeNew(void *newResult)
1087 {
1088 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1089 return MaxHelper(result, fMaxs.size());
1090 }
1091};
1092
1093template <typename ResultType>
1094class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1095 std::shared_ptr<ResultType> fResultSum;
1096 Results<ResultType> fSums;
1097 Results<ResultType> fCompensations;
1098
1099 /// Evaluate neutral element for this type and the sum operation.
1100 /// This is assumed to be any_value - any_value if operator- is defined
1101 /// for the type, otherwise a default-constructed ResultType{} is used.
1102 template <typename T = ResultType>
1103 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1104 {
1105 return v - v;
1106 }
1107
1108 template <typename T = ResultType, typename Dummy = int>
1109 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1110 {
1111 return ResultType{};
1112 }
1113
1114public:
1115 SumHelper(SumHelper &&) = default;
1116 SumHelper(const SumHelper &) = delete;
1117 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1118 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1)),
1119 fCompensations(nSlots, NeutralElement(*sumVPtr, -1))
1120 {
1121 }
1122 void InitTask(TTreeReader *, unsigned int) {}
1123
1124 void Exec(unsigned int slot, ResultType x)
1125 {
1126 // Kahan Sum:
1127 ResultType y = x - fCompensations[slot];
1128 ResultType t = fSums[slot] + y;
1129 fCompensations[slot] = (t - fSums[slot]) - y;
1130 fSums[slot] = t;
1131 }
1132
1133 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1134 void Exec(unsigned int slot, const T &vs)
1135 {
1136 for (auto &&v : vs) {
1137 Exec(slot, v);
1138 }
1139 }
1140
1141 void Initialize() { /* noop */}
1142
1143 void Finalize()
1144 {
1145 ResultType sum(NeutralElement(ResultType{}, -1));
1146 ResultType compensation(NeutralElement(ResultType{}, -1));
1147 ResultType y(NeutralElement(ResultType{}, -1));
1148 ResultType t(NeutralElement(ResultType{}, -1));
1149 for (auto &m : fSums) {
1150 // Kahan Sum:
1151 y = m - compensation;
1152 t = sum + y;
1153 compensation = (t - sum) - y;
1154 sum = t;
1155 }
1156 *fResultSum += sum;
1157 }
1158
1159 // Helper functions for RMergeableValue
1160 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1161 {
1162 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1163 }
1164
1165 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1166
1167 std::string GetActionName() { return "Sum"; }
1168
1169 SumHelper MakeNew(void *newResult)
1170 {
1171 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1172 *result = NeutralElement(*result, -1);
1173 return SumHelper(result, fSums.size());
1174 }
1175};
1176
1177class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1178 std::shared_ptr<double> fResultMean;
1179 std::vector<ULong64_t> fCounts;
1180 std::vector<double> fSums;
1181 std::vector<double> fPartialMeans;
1182 std::vector<double> fCompensations;
1183
1184public:
1185 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1186 MeanHelper(MeanHelper &&) = default;
1187 MeanHelper(const MeanHelper &) = delete;
1188 void InitTask(TTreeReader *, unsigned int) {}
1189 void Exec(unsigned int slot, double v);
1190
1191 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1192 void Exec(unsigned int slot, const T &vs)
1193 {
1194 for (auto &&v : vs) {
1195
1196 fCounts[slot]++;
1197 // Kahan Sum:
1198 double y = v - fCompensations[slot];
1199 double t = fSums[slot] + y;
1200 fCompensations[slot] = (t - fSums[slot]) - y;
1201 fSums[slot] = t;
1202 }
1203 }
1204
1205 void Initialize() { /* noop */}
1206
1207 void Finalize();
1208
1209 // Helper functions for RMergeableValue
1210 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1211 {
1212 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1213 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1214 }
1215
1216 double &PartialUpdate(unsigned int slot);
1217
1218 std::string GetActionName() { return "Mean"; }
1219
1220 MeanHelper MakeNew(void *newResult)
1221 {
1222 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1223 return MeanHelper(result, fSums.size());
1224 }
1225};
1226
1227class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1228 // Number of subsets of data
1229 unsigned int fNSlots;
1230 std::shared_ptr<double> fResultStdDev;
1231 // Number of element for each slot
1232 std::vector<ULong64_t> fCounts;
1233 // Mean of each slot
1234 std::vector<double> fMeans;
1235 // Squared distance from the mean
1236 std::vector<double> fDistancesfromMean;
1237
1238public:
1239 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1240 StdDevHelper(StdDevHelper &&) = default;
1241 StdDevHelper(const StdDevHelper &) = delete;
1242 void InitTask(TTreeReader *, unsigned int) {}
1243 void Exec(unsigned int slot, double v);
1244
1245 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1246 void Exec(unsigned int slot, const T &vs)
1247 {
1248 for (auto &&v : vs) {
1249 Exec(slot, v);
1250 }
1251 }
1252
1253 void Initialize() { /* noop */}
1254
1255 void Finalize();
1256
1257 // Helper functions for RMergeableValue
1258 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1259 {
1260 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1261 const Double_t mean =
1262 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1263 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1264 }
1265
1266 std::string GetActionName() { return "StdDev"; }
1267
1268 StdDevHelper MakeNew(void *newResult)
1269 {
1270 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1271 return StdDevHelper(result, fCounts.size());
1272 }
1273};
1274
1275template <typename PrevNodeType>
1276class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1277private:
1279 std::shared_ptr<Display_t> fDisplayerHelper;
1280 std::shared_ptr<PrevNodeType> fPrevNode;
1281 size_t fEntriesToProcess;
1282
1283public:
1284 DisplayHelper(size_t nRows, const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1285 : fDisplayerHelper(d), fPrevNode(prevNode), fEntriesToProcess(nRows)
1286 {
1287 }
1288 DisplayHelper(DisplayHelper &&) = default;
1289 DisplayHelper(const DisplayHelper &) = delete;
1290 void InitTask(TTreeReader *, unsigned int) {}
1291
1292 template <typename... Columns>
1293 void Exec(unsigned int, Columns &... columns)
1294 {
1295 if (fEntriesToProcess == 0)
1296 return;
1297
1298 fDisplayerHelper->AddRow(columns...);
1299 --fEntriesToProcess;
1300
1301 if (fEntriesToProcess == 0) {
1302 // No more entries to process. Send a one-time signal that this node
1303 // of the graph is done. It is important that the 'StopProcessing'
1304 // method is only called once from this helper, otherwise it would seem
1305 // like more than one operation has completed its work.
1306 fPrevNode->StopProcessing();
1307 }
1308 }
1309
1310 void Initialize() {}
1311
1312 void Finalize() {}
1313
1314 std::string GetActionName() { return "Display"; }
1315};
1316
1317template <typename T>
1318void *GetData(ROOT::VecOps::RVec<T> &v)
1319{
1320 return v.data();
1321}
1322
1323template <typename T>
1324void *GetData(T & /*v*/)
1325{
1326 return nullptr;
1327}
1328
1329template <typename T>
1330void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1331 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1332 bool /*isDefine*/)
1333{
1334 static TClassRef TBOClRef("TBranchObject");
1335
1336 TBranch *inputBranch = nullptr;
1337 if (inputTree) {
1338 inputBranch = inputTree->GetBranch(inName.c_str());
1339 if (!inputBranch) // try harder
1340 inputBranch = inputTree->FindBranch(inName.c_str());
1341 }
1342
1343 auto *outputBranch = outputBranches.Get(name);
1344 if (outputBranch) {
1345 // the output branch was already created, we just need to (re)set its address
1346 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1347 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1348 } else if (outputBranch->IsA() != TBranch::Class()) {
1349 branchAddress = address;
1350 outputBranch->SetAddress(&branchAddress);
1351 } else {
1352 outputBranch->SetAddress(address);
1353 branchAddress = address;
1354 }
1355 return;
1356 }
1357
1358 if (inputBranch) {
1359 // Respect the original bufsize and splitlevel arguments
1360 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1361 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1362 // TObject branch, see https://bit.ly/2EjLMId ).
1363 const auto bufSize = inputBranch->GetBasketSize();
1364 const auto splitLevel = inputBranch->GetSplitLevel();
1365
1366 if (inputBranch->IsA() == TBOClRef) {
1367 // Need to pass a pointer to pointer
1368 outputBranch =
1369 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1370 } else {
1371 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1372 }
1373 } else {
1374 outputBranch = outputTree.Branch(name.c_str(), address);
1375 }
1376 outputBranches.Insert(name, outputBranch);
1377 // This is not an array branch, so we don't register the address of the output branch here
1378 branch = nullptr;
1379 branchAddress = nullptr;
1380}
1381
1382/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1383/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1384/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1385/// title
1386/// 2. RVecs coming from a custom column or the input file/data-source
1387/// 3. vectors coming from ROOT files that are being read as RVecs
1388/// 4. TClonesArray
1389///
1390/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1391/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1392template <typename T>
1393void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1394 TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
1395{
1396 TBranch *inputBranch = nullptr;
1397 if (inputTree) {
1398 inputBranch = inputTree->GetBranch(inName.c_str());
1399 if (!inputBranch) // try harder
1400 inputBranch = inputTree->FindBranch(inName.c_str());
1401 }
1402 auto *outputBranch = outputBranches.Get(outName);
1403
1404 // if no backing input branch, we must write out an RVec
1405 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1406 // otherwise, if input branch is TClonesArray, must write out an RVec
1407 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1408 mustWriteRVec = true;
1409 Warning("Snapshot",
1410 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1411 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1412 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1413 inName.c_str());
1414 }
1415 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1416 if (!mustWriteRVec) {
1417 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1418 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
1419 mustWriteRVec = true;
1420 }
1421
1422 if (mustWriteRVec) {
1423 // Treat:
1424 // 2. RVec coming from a custom column or a source
1425 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1426 // 4. TClonesArray written out as RVec<T>
1427 if (outputBranch) {
1428 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1429 outputBranch->SetObject(ab);
1430 } else {
1431 auto *b = outputTree.Branch(outName.c_str(), ab);
1432 outputBranches.Insert(outName, b);
1433 }
1434 return;
1435 }
1436
1437 // else this must be a C-array, aka case 1.
1438 auto dataPtr = ab->data();
1439
1440 if (outputBranch) {
1441 if (outputBranch->IsA() != TBranch::Class()) {
1442 branchAddress = dataPtr;
1443 outputBranch->SetAddress(&branchAddress);
1444 } else {
1445 outputBranch->SetAddress(dataPtr);
1446 }
1447 } else {
1448 // must construct the leaflist for the output branch and create the branch in the output tree
1449 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1450 const auto bname = leaf->GetName();
1451 auto *sizeLeaf = leaf->GetLeafCount();
1452 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1453
1454 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1455 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1456 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1457 // branch to be successful. So we create the size leaf here.
1458 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1459 const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
1460 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1461 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1462 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1463 outputBranches.Insert(sizeLeafName, sizeBranch);
1464 }
1465
1466 const auto btype = leaf->GetTypeName();
1467 const auto rootbtype = TypeName2ROOTTypeName(btype);
1468 if (rootbtype == ' ') {
1469 Warning("Snapshot",
1470 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1471 "column will not be written out.",
1472 bname);
1473 } else {
1474 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1475 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1476 outputBranch->SetTitle(inputBranch->GetTitle());
1477 outputBranches.Insert(outName, outputBranch);
1478 branch = outputBranch;
1479 branchAddress = ab->data();
1480 }
1481 }
1482}
1483
1484void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1485
1486/// Helper object for a single-thread Snapshot action
1487template <typename... ColTypes>
1488class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl<SnapshotHelper<ColTypes...>> {
1489 std::string fFileName;
1490 std::string fDirName;
1491 std::string fTreeName;
1492 RSnapshotOptions fOptions;
1493 std::unique_ptr<TFile> fOutputFile;
1494 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1495 bool fBranchAddressesNeedReset{true};
1496 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1497 ColumnNames_t fOutputBranchNames;
1498 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1499 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1500 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1501 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1502 RBranchSet fOutputBranches;
1503 std::vector<bool> fIsDefine;
1504
1505public:
1506 using ColumnTypes_t = TypeList<ColTypes...>;
1507 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1508 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1509 std::vector<bool> &&isDefine)
1510 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1511 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1512 fBranchAddresses(vbnames.size(), nullptr), fIsDefine(std::move(isDefine))
1513 {
1514 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1515 }
1516
1517 SnapshotHelper(const SnapshotHelper &) = delete;
1518 SnapshotHelper(SnapshotHelper &&) = default;
1519 ~SnapshotHelper()
1520 {
1521 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy)
1522 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1523 }
1524
1525 void InitTask(TTreeReader *r, unsigned int /* slot */)
1526 {
1527 if (r)
1528 fInputTree = r->GetTree();
1529 fBranchAddressesNeedReset = true;
1530 }
1531
1532 void Exec(unsigned int /* slot */, ColTypes &... values)
1533 {
1534 using ind_t = std::index_sequence_for<ColTypes...>;
1535 if (!fBranchAddressesNeedReset) {
1536 UpdateCArraysPtrs(values..., ind_t{});
1537 } else {
1538 SetBranches(values..., ind_t{});
1539 fBranchAddressesNeedReset = false;
1540 }
1541 fOutputTree->Fill();
1542 }
1543
1544 template <std::size_t... S>
1545 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1546 {
1547 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1548 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1549 // leaving associated to the branch of the output tree an invalid pointer.
1550 // With this code, we set the value of the pointer in the output branch anew when needed.
1551 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1552 // we need an int for the expander list.
1553 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1554 ? fBranches[S]->SetAddress(GetData(values)),
1555 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1556 0};
1557 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1558 }
1559
1560 template <std::size_t... S>
1561 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1562 {
1563 // create branches in output tree
1564 int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
1565 fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
1566 0)...,
1567 0};
1568 fOutputBranches.AssertNoNullBranchAddresses();
1569 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1570 }
1571
1572 void Initialize()
1573 {
1574 fOutputFile.reset(
1575 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1577 if(!fOutputFile)
1578 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1579
1580 TDirectory *outputDir = fOutputFile.get();
1581 if (!fDirName.empty()) {
1582 TString checkupdate = fOptions.fMode;
1583 checkupdate.ToLower();
1584 if (checkupdate == "update")
1585 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1586 else
1587 outputDir = fOutputFile->mkdir(fDirName.c_str());
1588 }
1589
1590 fOutputTree =
1591 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1592
1593 if (fOptions.fAutoFlush)
1594 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1595 }
1596
1597 void Finalize()
1598 {
1599 assert(fOutputTree != nullptr);
1600 assert(fOutputFile != nullptr);
1601
1602 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1603 fOutputTree->AutoSave("flushbaskets");
1604 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1605 fOutputTree.reset();
1606 fOutputFile->Close();
1607 }
1608
1609 std::string GetActionName() { return "Snapshot"; }
1610
1611 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1612 {
1613 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1614 }
1615
1616 /**
1617 * @brief Create a new SnapshotHelper with a different output file name
1618 *
1619 * @param newName A type-erased string with the output file name
1620 * @return SnapshotHelper
1621 *
1622 * This MakeNew implementation is tied to the cloning feature of actions
1623 * of the computation graph. In particular, cloning a Snapshot node usually
1624 * also involves changing the name of the output file, otherwise the cloned
1625 * Snapshot would overwrite the same file.
1626 */
1627 SnapshotHelper MakeNew(void *newName)
1628 {
1629 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1630 return SnapshotHelper{
1631 finalName, fDirName, fTreeName, fInputBranchNames, fOutputBranchNames, fOptions, std::vector<bool>(fIsDefine)};
1632 }
1633};
1634
1635/// Helper object for a multi-thread Snapshot action
1636template <typename... ColTypes>
1637class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<ColTypes...>> {
1638 unsigned int fNSlots;
1639 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1640 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1641 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1642 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1643 std::string fFileName; // name of the output file name
1644 std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1645 std::string fTreeName; // name of output tree
1646 RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1647 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1648 ColumnNames_t fOutputBranchNames;
1649 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1650 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1651 std::vector<std::vector<TBranch *>> fBranches;
1652 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1653 std::vector<std::vector<void *>> fBranchAddresses;
1654 std::vector<RBranchSet> fOutputBranches;
1655 std::vector<bool> fIsDefine;
1656
1657public:
1658 using ColumnTypes_t = TypeList<ColTypes...>;
1659 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1660 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1661 const RSnapshotOptions &options, std::vector<bool> &&isDefine)
1662 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fBranchAddressesNeedReset(fNSlots, 1),
1663 fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1664 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots),
1665 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1666 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)), fOutputBranches(fNSlots),
1667 fIsDefine(std::move(isDefine))
1668 {
1669 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1670 }
1671 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1672 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1673 ~SnapshotHelperMT()
1674 {
1675 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
1676 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */)
1677 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1678 }
1679
1680 void InitTask(TTreeReader *r, unsigned int slot)
1681 {
1682 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1683 if (!fOutputFiles[slot]) {
1684 // first time this thread executes something, let's create a TBufferMerger output directory
1685 fOutputFiles[slot] = fMerger->GetFile();
1686 }
1687 TDirectory *treeDirectory = fOutputFiles[slot].get();
1688 if (!fDirName.empty()) {
1689 // call returnExistingDirectory=true since MT can end up making this call multiple times
1690 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1691 }
1692 // re-create output tree as we need to create its branches again, with new input variables
1693 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1694 fOutputTrees[slot] =
1695 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1696 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1697 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1698 fOutputTrees[slot]->SetImplicitMT(false);
1699 if (fOptions.fAutoFlush)
1700 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1701 if (r) {
1702 // not an empty-source RDF
1703 fInputTrees[slot] = r->GetTree();
1704 }
1705 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1706 }
1707
1708 void FinalizeTask(unsigned int slot)
1709 {
1710 if (fOutputTrees[slot]->GetEntries() > 0)
1711 fOutputFiles[slot]->Write();
1712 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1713 fOutputTrees[slot].reset(nullptr);
1714 fOutputBranches[slot].Clear();
1715 }
1716
1717 void Exec(unsigned int slot, ColTypes &... values)
1718 {
1719 using ind_t = std::index_sequence_for<ColTypes...>;
1720 if (fBranchAddressesNeedReset[slot] == 0) {
1721 UpdateCArraysPtrs(slot, values..., ind_t{});
1722 } else {
1723 SetBranches(slot, values..., ind_t{});
1724 fBranchAddressesNeedReset[slot] = 0;
1725 }
1726 fOutputTrees[slot]->Fill();
1727 auto entries = fOutputTrees[slot]->GetEntries();
1728 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1729 if ((autoFlush > 0) && (entries % autoFlush == 0))
1730 fOutputFiles[slot]->Write();
1731 }
1732
1733 template <std::size_t... S>
1734 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1735 {
1736 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1737 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1738 // leaving associated to the branch of the output tree an invalid pointer.
1739 // With this code, we set the value of the pointer in the output branch anew when needed.
1740 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1741 // we need an int for the expander list.
1742 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1743 ? fBranches[slot][S]->SetAddress(GetData(values)),
1744 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1745 0};
1746 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1747 (void)slot; // Also "slot" might be unused, in case "values" is empty
1748 }
1749
1750 template <std::size_t... S>
1751 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1752 {
1753 // hack to call TTree::Branch on all variadic template arguments
1754 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1755 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1756 &values, fOutputBranches[slot], fIsDefine[S]),
1757 0)...,
1758 0};
1759 fOutputBranches[slot].AssertNoNullBranchAddresses();
1760 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1761 }
1762
1763 void Initialize()
1764 {
1765 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1766 auto out_file = TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs);
1767 if(!out_file)
1768 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1769 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(out_file));
1770 }
1771
1772 void Finalize()
1773 {
1774 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1775
1776 auto fileWritten = false;
1777 for (auto &file : fOutputFiles) {
1778 if (file) {
1779 file->Write();
1780 file->Close();
1781 fileWritten = true;
1782 }
1783 }
1784
1785 if (!fileWritten) {
1786 Warning("Snapshot",
1787 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1788 }
1789
1790 // flush all buffers to disk by destroying the TBufferMerger
1791 fOutputFiles.clear();
1792 fMerger.reset();
1793 }
1794
1795 std::string GetActionName() { return "Snapshot"; }
1796
1797 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1798 {
1799 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1800 }
1801
1802 /**
1803 * @brief Create a new SnapshotHelperMT with a different output file name
1804 *
1805 * @param newName A type-erased string with the output file name
1806 * @return SnapshotHelperMT
1807 *
1808 * This MakeNew implementation is tied to the cloning feature of actions
1809 * of the computation graph. In particular, cloning a Snapshot node usually
1810 * also involves changing the name of the output file, otherwise the cloned
1811 * Snapshot would overwrite the same file.
1812 */
1813 SnapshotHelperMT MakeNew(void *newName)
1814 {
1815 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1816 return SnapshotHelperMT{fNSlots, finalName, fDirName, fTreeName,
1817 fInputBranchNames, fOutputBranchNames, fOptions, std::vector<bool>(fIsDefine)};
1818 }
1819};
1820
1821template <typename Acc, typename Merge, typename R, typename T, typename U,
1822 bool MustCopyAssign = std::is_same<R, U>::value>
1823class R__CLING_PTRCHECK(off) AggregateHelper
1824 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1825 Acc fAggregate;
1826 Merge fMerge;
1827 std::shared_ptr<U> fResult;
1828 Results<U> fAggregators;
1829
1830public:
1831 using ColumnTypes_t = TypeList<T>;
1832
1833 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1834 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1835 {
1836 }
1837
1838 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1839 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
1840 {
1841 }
1842
1843 AggregateHelper(AggregateHelper &&) = default;
1844 AggregateHelper(const AggregateHelper &) = delete;
1845
1846 void InitTask(TTreeReader *, unsigned int) {}
1847
1848 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
1849 void Exec(unsigned int slot, const T &value)
1850 {
1851 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1852 }
1853
1854 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
1855 void Exec(unsigned int slot, const T &value)
1856 {
1857 fAggregate(fAggregators[slot], value);
1858 }
1859
1860 void Initialize() { /* noop */}
1861
1862 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1863 bool MergeAll = std::is_same<void, MergeRet>::value>
1864 std::enable_if_t<MergeAll, void> Finalize()
1865 {
1866 fMerge(fAggregators);
1867 *fResult = fAggregators[0];
1868 }
1869
1870 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1871 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1872 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
1873 {
1874 for (const auto &acc : fAggregators)
1875 *fResult = fMerge(*fResult, acc);
1876 }
1877
1878 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1879
1880 std::string GetActionName() { return "Aggregate"; }
1881
1882 AggregateHelper MakeNew(void *newResult)
1883 {
1884 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
1885 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
1886 }
1887};
1888
1889} // end of NS RDF
1890} // end of NS Internal
1891} // end of NS ROOT
1892
1893/// \endcond
1894
1895#endif
PyObject * fCallable
Handle_t Display_t
Display handle.
Definition GuiTypes.h:27
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define g(i)
Definition RSha256.hxx:105
#define h(i)
Definition RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition RtypesCore.h:59
unsigned long long ULong64_t
Definition RtypesCore.h:70
#define X(type, name)
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
TClass * IsA() const override
Definition TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
pointer data() noexcept
Return a pointer to the vector's buffer, even if empty().
Definition RVec.hxx:280
This class is the textual representation of the content of a columnar dataset.
Definition RDisplay.hxx:65
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
A TTree is a list of TBranches.
Definition TBranch.h:93
virtual const char * GetClassName() const
Return the name of the user class whose content is stored in this branch, if any.
Definition TBranch.cxx:1324
virtual char * GetAddress() const
Definition TBranch.h:212
static TClass * Class()
Int_t GetSplitLevel() const
Definition TBranch.h:250
TClass * IsA() const override
Definition TBranch.h:295
virtual Int_t GetBasketSize() const
Definition TBranch.h:217
TObjArray * GetListOfLeaves()
Definition TBranch.h:247
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:28
Collection abstract base class.
Definition TCollection.h:65
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4089
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:670
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
A doubly linked list.
Definition TList.h:38
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
TObject * UncheckedAt(Int_t i) const
Definition TObjArray.h:84
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition TTree.cxx:4841
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition TTree.cxx:5294
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition TTree.h:353
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:261
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
#define F(x, y, z)
#define H(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition API.cxx:455
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:314
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:259
constexpr std::size_t FindIdxTrue(const T &arr)
Definition Utils.hxx:231
void(off) SmallVectorTemplateBase< T
double T(double x)
std::vector< std::string > ColumnNames_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition RooArgSet.h:195
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
__device__ AFloat max(AFloat x, AFloat y)
Definition Kernels.cuh:207
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition tmvaglob.cxx:176
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345