Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include <string_view>
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46#include "TStatistic.h"
49
50#include <algorithm>
51#include <functional>
52#include <limits>
53#include <memory>
54#include <stdexcept>
55#include <string>
56#include <type_traits>
57#include <utility> // std::index_sequence
58#include <vector>
59#include <iomanip>
60#include <numeric> // std::accumulate in MeanHelper
61
62/// \cond HIDDEN_SYMBOLS
63
64namespace ROOT {
65namespace Internal {
66namespace RDF {
67using namespace ROOT::TypeTraits;
68using namespace ROOT::VecOps;
69using namespace ROOT::RDF;
70using namespace ROOT::Detail::RDF;
71
72using Hist_t = ::TH1D;
73
74class RBranchSet {
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
77
78public:
79 TBranch *Get(const std::string &name) const
80 {
81 auto it = std::find(fNames.begin(), fNames.end(), name);
82 if (it == fNames.end())
83 return nullptr;
84 return fBranches[std::distance(fNames.begin(), it)];
85 }
86
87 void Insert(const std::string &name, TBranch *address)
88 {
89 if (address == nullptr) {
90 throw std::logic_error("Trying to insert a null branch address.");
91 }
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error("Trying to insert a branch address that's already present.");
94 }
95 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
96 throw std::logic_error("Trying to insert a branch name that's already present.");
97 }
98 fNames.emplace_back(name);
99 fBranches.emplace_back(address);
100 }
101
102 void Clear()
103 {
104 fBranches.clear();
105 fNames.clear();
106 }
107
108 void AssertNoNullBranchAddresses()
109 {
110 std::vector<TBranch *> branchesWithNullAddress;
111 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
112 [](TBranch *b) { return b->GetAddress() == nullptr; });
113
114 if (branchesWithNullAddress.empty())
115 return;
116
117 // otherwise build error message and throw
118 std::vector<std::string> missingBranchNames;
119 std::transform(branchesWithNullAddress.begin(), branchesWithNullAddress.end(),
120 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
121 std::string msg = "RDataFrame::Snapshot:";
122 if (missingBranchNames.size() == 1) {
123 msg += " branch " + missingBranchNames[0] +
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
125 "it is";
126 } else {
127 msg += " branches ";
128 for (const auto &bName : missingBranchNames)
129 msg += bName + ", ";
130 msg.resize(msg.size() - 2); // remove last ", "
131 msg +=
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
133 }
134 msg += " not part of the set of branches that are being written out.";
135 throw std::runtime_error(msg);
136 }
137};
138
139/// The container type for each thread's partial result in an action helper
140// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
141// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
142// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
143template <typename T>
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
145
146template <typename F>
147class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
148 F fCallable;
149
150public:
152 ForeachSlotHelper(F &&f) : fCallable(f) {}
153 ForeachSlotHelper(ForeachSlotHelper &&) = default;
154 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
155
156 void InitTask(TTreeReader *, unsigned int) {}
157
158 template <typename... Args>
159 void Exec(unsigned int slot, Args &&... args)
160 {
161 // check that the decayed types of Args are the same as the branch types
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
163 fCallable(slot, std::forward<Args>(args)...);
164 }
165
166 void Initialize() { /* noop */}
167
168 void Finalize() { /* noop */}
169
170 std::string GetActionName() { return "ForeachSlot"; }
171};
172
173class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
174 std::shared_ptr<ULong64_t> fResultCount;
175 Results<ULong64_t> fCounts;
176
177public:
178 using ColumnTypes_t = TypeList<>;
179 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
180 CountHelper(CountHelper &&) = default;
181 CountHelper(const CountHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot);
184 void Initialize() { /* noop */}
185 void Finalize();
186
187 // Helper functions for RMergeableValue
188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
189 {
190 return std::make_unique<RMergeableCount>(*fResultCount);
191 }
192
193 ULong64_t &PartialUpdate(unsigned int slot);
194
195 std::string GetActionName() { return "Count"; }
196
197 CountHelper MakeNew(void *newResult)
198 {
199 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
200 return CountHelper(result, fCounts.size());
201 }
202};
203
204template <typename RNode_t>
205class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<RNode_t>> {
206 std::shared_ptr<RCutFlowReport> fReport;
207 /// Non-owning pointer, never null. As usual, the node is owned by its children nodes (and therefore indirectly by
208 /// the RAction corresponding to this action helper).
209 RNode_t *fNode;
210 bool fReturnEmptyReport;
211
212public:
213 using ColumnTypes_t = TypeList<>;
214 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, RNode_t *node, bool emptyRep)
215 : fReport(report), fNode(node), fReturnEmptyReport(emptyRep){};
216 ReportHelper(ReportHelper &&) = default;
217 ReportHelper(const ReportHelper &) = delete;
218 void InitTask(TTreeReader *, unsigned int) {}
219 void Exec(unsigned int /* slot */) {}
220 void Initialize() { /* noop */}
221 void Finalize()
222 {
223 if (!fReturnEmptyReport)
224 fNode->Report(*fReport);
225 }
226
227 std::string GetActionName() { return "Report"; }
228
229 // TODO implement MakeNew. Requires some smartness in passing the appropriate previous node.
230};
231
232/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
233///
234/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
235/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
236/// thread, making it impossible to merge the per-thread results.
237/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
238/// the decision on the limits as part of the merge operation.
239class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
240 // this sets a total initial size of 16 MB for the buffers (can increase)
241 static constexpr unsigned int fgTotalBufSize = 2097152;
242 using BufEl_t = double;
243 using Buf_t = std::vector<BufEl_t>;
244
245 std::vector<Buf_t> fBuffers;
246 std::vector<Buf_t> fWBuffers;
247 std::shared_ptr<Hist_t> fResultHist;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
250 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
251 Results<std::unique_ptr<Hist_t>> fPartialHists;
252 Buf_t fMin;
253 Buf_t fMax;
254
255 void UpdateMinMax(unsigned int slot, double v);
256
257public:
258 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
259 BufferedFillHelper(BufferedFillHelper &&) = default;
260 BufferedFillHelper(const BufferedFillHelper &) = delete;
261 void InitTask(TTreeReader *, unsigned int) {}
262 void Exec(unsigned int slot, double v);
263 void Exec(unsigned int slot, double v, double w);
264
265 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
266 void Exec(unsigned int slot, const T &vs)
267 {
268 auto &thisBuf = fBuffers[slot];
269 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
270 for (auto v = vs.begin(); v != vs.end(); ++v) {
271 UpdateMinMax(slot, *v);
272 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
273 }
274 }
275
276 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && IsDataContainer<W>::value, int> = 0>
277 void Exec(unsigned int slot, const T &vs, const W &ws)
278 {
279 auto &thisBuf = fBuffers[slot];
280
281 for (auto &v : vs) {
282 UpdateMinMax(slot, v);
283 thisBuf.emplace_back(v);
284 }
285
286 auto &thisWBuf = fWBuffers[slot];
287 for (auto &w : ws) {
288 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
289 }
290 }
291
292 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && !IsDataContainer<W>::value, int> = 0>
293 void Exec(unsigned int slot, const T &vs, const W w)
294 {
295 auto &thisBuf = fBuffers[slot];
296 for (auto &v : vs) {
297 UpdateMinMax(slot, v);
298 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
299 }
300
301 auto &thisWBuf = fWBuffers[slot];
302 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
303 }
304
305 template <typename T, typename W, std::enable_if_t<IsDataContainer<W>::value && !IsDataContainer<T>::value, int> = 0>
306 void Exec(unsigned int slot, const T v, const W &ws)
307 {
308 UpdateMinMax(slot, v);
309 auto &thisBuf = fBuffers[slot];
310 thisBuf.insert(thisBuf.end(), ws.size(), v);
311
312 auto &thisWBuf = fWBuffers[slot];
313 thisWBuf.insert(thisWBuf.end(), ws.begin(), ws.end());
314 }
315
316 Hist_t &PartialUpdate(unsigned int);
317
318 void Initialize() { /* noop */}
319
320 void Finalize();
321
322 // Helper functions for RMergeableValue
323 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
324 {
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
326 }
327
328 std::string GetActionName()
329 {
330 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
331 }
332
333 BufferedFillHelper MakeNew(void *newResult)
334 {
335 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
336 result->Reset();
337 result->SetDirectory(nullptr);
338 return BufferedFillHelper(result, fNSlots);
339 }
340};
341
342/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
343/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
344template <typename HIST = Hist_t>
345class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
346 std::vector<HIST *> fObjects;
347
348 template <typename H = HIST, typename = decltype(std::declval<H>().Reset())>
349 void ResetIfPossible(H *h)
350 {
351 h->Reset();
352 }
353
354 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
355
356 // cannot safely re-initialize variations of the result, hence error out
357 void ResetIfPossible(...)
358 {
359 throw std::runtime_error(
360 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
361 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
362 }
363
364 void UnsetDirectoryIfPossible(TH1 *h) {
365 h->SetDirectory(nullptr);
366 }
367
368 void UnsetDirectoryIfPossible(...) {}
369
370 // Merge overload for types with Merge(TCollection*), like TH1s
371 template <typename H, typename = std::enable_if_t<std::is_base_of<TObject, H>::value, int>>
372 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
373 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
374 {
375 TList l;
376 for (auto it = ++objs.begin(); it != objs.end(); ++it)
377 l.Add(*it);
378 objs[0]->Merge(&l);
379 }
380
381 // Merge overload for types with Merge(const std::vector&)
382 template <typename H>
383 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
384 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
385 {
386 objs[0]->Merge({++objs.begin(), objs.end()});
387 }
388
389 // Merge overload to error out in case no valid HIST::Merge method was detected
390 template <typename T>
391 void Merge(T, ...)
392 {
393 static_assert(sizeof(T) < 0,
394 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
395 }
396
397 // class which wraps a pointer and implements a no-op increment operator
398 template <typename T>
399 class ScalarConstIterator {
400 const T *obj_;
401
402 public:
403 ScalarConstIterator(const T *obj) : obj_(obj) {}
404 const T &operator*() const { return *obj_; }
405 ScalarConstIterator<T> &operator++() { return *this; }
406 };
407
408 // helper functions which provide one implementation for scalar types and another for containers
409 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
410 // in c++17 or later
411
412 // return unchanged value for scalar
413 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
414 ScalarConstIterator<T> MakeBegin(const T &val)
415 {
416 return ScalarConstIterator<T>(&val);
417 }
418
419 // return iterator to beginning of container
420 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
421 auto MakeBegin(const T &val)
422 {
423 return std::begin(val);
424 }
425
426 // return 1 for scalars
427 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
428 std::size_t GetSize(const T &)
429 {
430 return 1;
431 }
432
433 // return container size
434 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
435 std::size_t GetSize(const T &val)
436 {
437#if __cplusplus >= 201703L
438 return std::size(val);
439#else
440 return val.size();
441#endif
442 }
443
444 template <std::size_t ColIdx, typename End_t, typename... Its>
445 void ExecLoop(unsigned int slot, End_t end, Its... its)
446 {
447 auto *thisSlotH = fObjects[slot];
448 // loop increments all of the iterators while leaving scalars unmodified
449 // TODO this could be simplified with fold expressions or std::apply in C++17
450 auto nop = [](auto &&...) {};
451 for (; GetNthElement<ColIdx>(its...) != end; nop(++its...)) {
452 thisSlotH->Fill(*its...);
453 }
454 }
455
456public:
457 FillHelper(FillHelper &&) = default;
458 FillHelper(const FillHelper &) = delete;
459
460 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
461 {
462 fObjects[0] = h.get();
463 // Initialize all other slots
464 for (unsigned int i = 1; i < nSlots; ++i) {
465 fObjects[i] = new HIST(*fObjects[0]);
466 UnsetDirectoryIfPossible(fObjects[i]);
467 }
468 }
469
470 void InitTask(TTreeReader *, unsigned int) {}
471
472 // no container arguments
473 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
474 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
475 {
476 fObjects[slot]->Fill(x...);
477 }
478
479 // at least one container argument
480 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
481 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
482 {
483 // array of bools keeping track of which inputs are containers
484 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
485
486 // index of the first container input
487 constexpr std::size_t colidx = FindIdxTrue(isContainer);
488 // if this happens, there is a bug in the implementation
489 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
490
491 // get the end iterator to the first container
492 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
493
494 // array of container sizes (1 for scalars)
495 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
496
497 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
498 if (isContainer[i] && sizes[i] != sizes[colidx]) {
499 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
500 }
501 }
502
503 ExecLoop<colidx>(slot, xrefend, MakeBegin(xs)...);
504 }
505
506 template <typename T = HIST>
507 void Exec(...)
508 {
509 static_assert(sizeof(T) < 0,
510 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
511 "columns passed did not match the signature of the object's `Fill` method.");
512 }
513
514 void Initialize() { /* noop */}
515
516 void Finalize()
517 {
518 if (fObjects.size() == 1)
519 return;
520
521 Merge(fObjects, /*toselectcorrectoverload=*/0);
522
523 // delete the copies we created for the slots other than the first
524 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
525 delete *it;
526 }
527
528 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
529
530 // Helper functions for RMergeableValue
531 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
532 {
533 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
534 }
535
536 // if the fObjects vector type is derived from TObject, return the name of the object
537 template <typename T = HIST, std::enable_if_t<std::is_base_of<TObject, T>::value, int> = 0>
538 std::string GetActionName()
539 {
540 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
541 }
542
543 // if fObjects is not derived from TObject, indicate it is some other object
544 template <typename T = HIST, std::enable_if_t<!std::is_base_of<TObject, T>::value, int> = 0>
545 std::string GetActionName()
546 {
547 return "Fill custom object";
548 }
549
550 template <typename H = HIST>
551 FillHelper MakeNew(void *newResult)
552 {
553 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
554 ResetIfPossible(result.get());
555 UnsetDirectoryIfPossible(result.get());
556 return FillHelper(result, fObjects.size());
557 }
558};
559
560class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
561public:
562 using Result_t = ::TGraph;
563
564private:
565 std::vector<::TGraph *> fGraphs;
566
567public:
568 FillTGraphHelper(FillTGraphHelper &&) = default;
569 FillTGraphHelper(const FillTGraphHelper &) = delete;
570
571 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
572 {
573 fGraphs[0] = g.get();
574 // Initialize all other slots
575 for (unsigned int i = 1; i < nSlots; ++i) {
576 fGraphs[i] = new TGraph(*fGraphs[0]);
577 }
578 }
579
580 void Initialize() {}
581 void InitTask(TTreeReader *, unsigned int) {}
582
583 // case: both types are container types
584 template <typename X0, typename X1,
585 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int> = 0>
586 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
587 {
588 if (x0s.size() != x1s.size()) {
589 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
590 }
591 auto *thisSlotG = fGraphs[slot];
592 auto x0sIt = std::begin(x0s);
593 const auto x0sEnd = std::end(x0s);
594 auto x1sIt = std::begin(x1s);
595 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
596 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
597 }
598 }
599
600 // case: both types are non-container types, e.g. scalars
601 template <typename X0, typename X1,
602 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value, int> = 0>
603 void Exec(unsigned int slot, X0 x0, X1 x1)
604 {
605 auto thisSlotG = fGraphs[slot];
606 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
607 }
608
609 // case: types are combination of containers and non-containers
610 // this is not supported, error out
611 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
612 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
613 {
614 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
615 }
616
617 void Finalize()
618 {
619 const auto nSlots = fGraphs.size();
620 auto resGraph = fGraphs[0];
621 TList l;
622 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
623 for (unsigned int slot = 1; slot < nSlots; ++slot) {
624 l.Add(fGraphs[slot]);
625 }
626 resGraph->Merge(&l);
627 }
628
629 // Helper functions for RMergeableValue
630 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
631 {
632 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
633 }
634
635 std::string GetActionName() { return "Graph"; }
636
637 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
638
639 FillTGraphHelper MakeNew(void *newResult)
640 {
641 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
642 result->Set(0);
643 return FillTGraphHelper(result, fGraphs.size());
644 }
645};
646
647class R__CLING_PTRCHECK(off) FillTGraphAsymmErrorsHelper
648 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
649public:
650 using Result_t = ::TGraphAsymmErrors;
651
652private:
653 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
654
655public:
656 FillTGraphAsymmErrorsHelper(FillTGraphAsymmErrorsHelper &&) = default;
657 FillTGraphAsymmErrorsHelper(const FillTGraphAsymmErrorsHelper &) = delete;
658
659 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
660 : fGraphAsymmErrors(nSlots, nullptr)
661 {
662 fGraphAsymmErrors[0] = g.get();
663 // Initialize all other slots
664 for (unsigned int i = 1; i < nSlots; ++i) {
665 fGraphAsymmErrors[i] = new TGraphAsymmErrors(*fGraphAsymmErrors[0]);
666 }
667 }
668
669 void Initialize() {}
670 void InitTask(TTreeReader *, unsigned int) {}
671
672 // case: all types are container types
673 template <
674 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
675 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
676 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
677 int> = 0>
678 void
679 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
680 {
681 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
682 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
683 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
684 }
685 auto *thisSlotG = fGraphAsymmErrors[slot];
686 auto xsIt = std::begin(xs);
687 auto ysIt = std::begin(ys);
688 auto exlsIt = std::begin(exls);
689 auto exhsIt = std::begin(exhs);
690 auto eylsIt = std::begin(eyls);
691 auto eyhsIt = std::begin(eyhs);
692 while (xsIt != std::end(xs)) {
693 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
694 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
695 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
696 }
697 }
698
699 // case: all types are non-container types, e.g. scalars
700 template <
701 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
702 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
703 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
704 int> = 0>
705 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
706 {
707 auto thisSlotG = fGraphAsymmErrors[slot];
708 const auto n = thisSlotG->GetN();
709 thisSlotG->SetPoint(n, x, y);
710 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
711 }
712
713 // case: types are combination of containers and non-containers
714 // this is not supported, error out
715 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
716 typename... ExtraArgsToLowerPriority>
717 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
718 {
719 throw std::runtime_error(
720 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
721 }
722
723 void Finalize()
724 {
725 const auto nSlots = fGraphAsymmErrors.size();
726 auto resGraphAsymmErrors = fGraphAsymmErrors[0];
727 TList l;
728 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
729 for (unsigned int slot = 1; slot < nSlots; ++slot) {
730 l.Add(fGraphAsymmErrors[slot]);
731 }
732 resGraphAsymmErrors->Merge(&l);
733 }
734
735 // Helper functions for RMergeableValue
736 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
737 {
738 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
739 }
740
741 std::string GetActionName() { return "GraphAsymmErrors"; }
742
743 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
744
745 FillTGraphAsymmErrorsHelper MakeNew(void *newResult)
746 {
747 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
748 result->Set(0);
749 return FillTGraphAsymmErrorsHelper(result, fGraphAsymmErrors.size());
750 }
751};
752
753// In case of the take helper we have 4 cases:
754// 1. The column is not an RVec, the collection is not a vector
755// 2. The column is not an RVec, the collection is a vector
756// 3. The column is an RVec, the collection is not a vector
757// 4. The column is an RVec, the collection is a vector
758
759template <typename V, typename COLL>
760void FillColl(V&& v, COLL& c) {
761 c.emplace_back(v);
762}
763
764// Use push_back for bool since some compilers do not support emplace_back.
765template <typename COLL>
766void FillColl(bool v, COLL& c) {
767 c.push_back(v);
768}
769
770// Case 1.: The column is not an RVec, the collection is not a vector
771// No optimisations, no transformations: just copies.
772template <typename RealT_t, typename T, typename COLL>
773class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
774 Results<std::shared_ptr<COLL>> fColls;
775
776public:
777 using ColumnTypes_t = TypeList<T>;
778 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
779 {
780 fColls.emplace_back(resultColl);
781 for (unsigned int i = 1; i < nSlots; ++i)
782 fColls.emplace_back(std::make_shared<COLL>());
783 }
784 TakeHelper(TakeHelper &&);
785 TakeHelper(const TakeHelper &) = delete;
786
787 void InitTask(TTreeReader *, unsigned int) {}
788
789 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
790
791 void Initialize() { /* noop */}
792
793 void Finalize()
794 {
795 auto rColl = fColls[0];
796 for (unsigned int i = 1; i < fColls.size(); ++i) {
797 const auto &coll = fColls[i];
798 const auto end = coll->end();
799 // Use an explicit loop here to prevent compiler warnings introduced by
800 // clang's range-based loop analysis and vector<bool> references.
801 for (auto j = coll->begin(); j != end; j++) {
802 FillColl(*j, *rColl);
803 }
804 }
805 }
806
807 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
808
809 std::string GetActionName() { return "Take"; }
810
811 TakeHelper MakeNew(void *newResult)
812 {
813 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
814 result->clear();
815 return TakeHelper(result, fColls.size());
816 }
817};
818
819// Case 2.: The column is not an RVec, the collection is a vector
820// Optimisations, no transformations: just copies.
821template <typename RealT_t, typename T>
822class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
823 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
824 Results<std::shared_ptr<std::vector<T>>> fColls;
825
826public:
827 using ColumnTypes_t = TypeList<T>;
828 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
829 {
830 fColls.emplace_back(resultColl);
831 for (unsigned int i = 1; i < nSlots; ++i) {
832 auto v = std::make_shared<std::vector<T>>();
833 v->reserve(1024);
834 fColls.emplace_back(v);
835 }
836 }
837 TakeHelper(TakeHelper &&);
838 TakeHelper(const TakeHelper &) = delete;
839
840 void InitTask(TTreeReader *, unsigned int) {}
841
842 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
843
844 void Initialize() { /* noop */}
845
846 // This is optimised to treat vectors
847 void Finalize()
848 {
849 ULong64_t totSize = 0;
850 for (auto &coll : fColls)
851 totSize += coll->size();
852 auto rColl = fColls[0];
853 rColl->reserve(totSize);
854 for (unsigned int i = 1; i < fColls.size(); ++i) {
855 auto &coll = fColls[i];
856 rColl->insert(rColl->end(), coll->begin(), coll->end());
857 }
858 }
859
860 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
861
862 std::string GetActionName() { return "Take"; }
863
864 TakeHelper MakeNew(void *newResult)
865 {
866 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
867 result->clear();
868 return TakeHelper(result, fColls.size());
869 }
870};
871
872// Case 3.: The column is a RVec, the collection is not a vector
873// No optimisations, transformations from RVecs to vectors
874template <typename RealT_t, typename COLL>
875class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, COLL>
876 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
877 Results<std::shared_ptr<COLL>> fColls;
878
879public:
880 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
881 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
882 {
883 fColls.emplace_back(resultColl);
884 for (unsigned int i = 1; i < nSlots; ++i)
885 fColls.emplace_back(std::make_shared<COLL>());
886 }
887 TakeHelper(TakeHelper &&);
888 TakeHelper(const TakeHelper &) = delete;
889
890 void InitTask(TTreeReader *, unsigned int) {}
891
892 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
893
894 void Initialize() { /* noop */}
895
896 void Finalize()
897 {
898 auto rColl = fColls[0];
899 for (unsigned int i = 1; i < fColls.size(); ++i) {
900 auto &coll = fColls[i];
901 for (auto &v : *coll) {
902 rColl->emplace_back(v);
903 }
904 }
905 }
906
907 std::string GetActionName() { return "Take"; }
908
909 TakeHelper MakeNew(void *newResult)
910 {
911 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
912 result->clear();
913 return TakeHelper(result, fColls.size());
914 }
915};
916
917// Case 4.: The column is an RVec, the collection is a vector
918// Optimisations, transformations from RVecs to vectors
919template <typename RealT_t>
920class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
921 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
922
923 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
924
925public:
926 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
927 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
928 {
929 fColls.emplace_back(resultColl);
930 for (unsigned int i = 1; i < nSlots; ++i) {
931 auto v = std::make_shared<std::vector<RealT_t>>();
932 v->reserve(1024);
933 fColls.emplace_back(v);
934 }
935 }
936 TakeHelper(TakeHelper &&);
937 TakeHelper(const TakeHelper &) = delete;
938
939 void InitTask(TTreeReader *, unsigned int) {}
940
941 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
942
943 void Initialize() { /* noop */}
944
945 // This is optimised to treat vectors
946 void Finalize()
947 {
948 ULong64_t totSize = 0;
949 for (auto &coll : fColls)
950 totSize += coll->size();
951 auto rColl = fColls[0];
952 rColl->reserve(totSize);
953 for (unsigned int i = 1; i < fColls.size(); ++i) {
954 auto &coll = fColls[i];
955 rColl->insert(rColl->end(), coll->begin(), coll->end());
956 }
957 }
958
959 std::string GetActionName() { return "Take"; }
960
961 TakeHelper MakeNew(void *newResult)
962 {
963 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
964 result->clear();
965 return TakeHelper(result, fColls.size());
966 }
967};
968
969// Extern templates for TakeHelper
970// NOTE: The move-constructor of specializations declared as extern templates
971// must be defined out of line, otherwise cling fails to find its symbol.
972template <typename RealT_t, typename T, typename COLL>
973TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
974template <typename RealT_t, typename T>
975TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
976template <typename RealT_t, typename COLL>
977TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
978template <typename RealT_t>
979TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
980
981// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
982#if __GNUC__ > 5
983extern template class TakeHelper<bool, bool, std::vector<bool>>;
984extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
985extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
986extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
987extern template class TakeHelper<int, int, std::vector<int>>;
988extern template class TakeHelper<long, long, std::vector<long>>;
989extern template class TakeHelper<long long, long long, std::vector<long long>>;
990extern template class TakeHelper<float, float, std::vector<float>>;
991extern template class TakeHelper<double, double, std::vector<double>>;
992#endif
993
994template <typename ResultType>
995class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
996 std::shared_ptr<ResultType> fResultMin;
997 Results<ResultType> fMins;
998
999public:
1000 MinHelper(MinHelper &&) = default;
1001 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1002 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1003 {
1004 }
1005
1006 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1007
1008 void InitTask(TTreeReader *, unsigned int) {}
1009
1010 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1011 void Exec(unsigned int slot, const T &vs)
1012 {
1013 for (auto &&v : vs)
1014 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1015 }
1016
1017 void Initialize() { /* noop */}
1018
1019 void Finalize()
1020 {
1021 *fResultMin = std::numeric_limits<ResultType>::max();
1022 for (auto &m : fMins)
1023 *fResultMin = std::min(m, *fResultMin);
1024 }
1025
1026 // Helper functions for RMergeableValue
1027 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1028 {
1029 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1030 }
1031
1032 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1033
1034 std::string GetActionName() { return "Min"; }
1035
1036 MinHelper MakeNew(void *newResult)
1037 {
1038 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1039 return MinHelper(result, fMins.size());
1040 }
1041};
1042
1043// TODO
1044// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
1045// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
1046// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
1047// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
1048// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1049
1050template <typename ResultType>
1051class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1052 std::shared_ptr<ResultType> fResultMax;
1053 Results<ResultType> fMaxs;
1054
1055public:
1056 MaxHelper(MaxHelper &&) = default;
1057 MaxHelper(const MaxHelper &) = delete;
1058 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1059 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1060 {
1061 }
1062
1063 void InitTask(TTreeReader *, unsigned int) {}
1064 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1065
1066 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1067 void Exec(unsigned int slot, const T &vs)
1068 {
1069 for (auto &&v : vs)
1070 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1071 }
1072
1073 void Initialize() { /* noop */}
1074
1075 void Finalize()
1076 {
1077 *fResultMax = std::numeric_limits<ResultType>::lowest();
1078 for (auto &m : fMaxs) {
1079 *fResultMax = std::max(m, *fResultMax);
1080 }
1081 }
1082
1083 // Helper functions for RMergeableValue
1084 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1085 {
1086 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1087 }
1088
1089 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1090
1091 std::string GetActionName() { return "Max"; }
1092
1093 MaxHelper MakeNew(void *newResult)
1094 {
1095 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1096 return MaxHelper(result, fMaxs.size());
1097 }
1098};
1099
1100// TODO
1101// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
1102// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
1103// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
1104// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
1105// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1106
1107template <typename ResultType>
1108class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1109 std::shared_ptr<ResultType> fResultSum;
1110 Results<ResultType> fSums;
1111 Results<ResultType> fCompensations;
1112
1113 /// Evaluate neutral element for this type and the sum operation.
1114 /// This is assumed to be any_value - any_value if operator- is defined
1115 /// for the type, otherwise a default-constructed ResultType{} is used.
1116 template <typename T = ResultType>
1117 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1118 {
1119 return v - v;
1120 }
1121
1122 template <typename T = ResultType, typename Dummy = int>
1123 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1124 {
1125 return ResultType{};
1126 }
1127
1128public:
1129 SumHelper(SumHelper &&) = default;
1130 SumHelper(const SumHelper &) = delete;
1131 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1132 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1)),
1133 fCompensations(nSlots, NeutralElement(*sumVPtr, -1))
1134 {
1135 }
1136 void InitTask(TTreeReader *, unsigned int) {}
1137
1138 void Exec(unsigned int slot, ResultType x)
1139 {
1140 // Kahan Sum:
1141 ResultType y = x - fCompensations[slot];
1142 ResultType t = fSums[slot] + y;
1143 fCompensations[slot] = (t - fSums[slot]) - y;
1144 fSums[slot] = t;
1145 }
1146
1147 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1148 void Exec(unsigned int slot, const T &vs)
1149 {
1150 for (auto &&v : vs) {
1151 Exec(slot, v);
1152 }
1153 }
1154
1155 void Initialize() { /* noop */}
1156
1157 void Finalize()
1158 {
1159 ResultType sum(NeutralElement(ResultType{}, -1));
1160 ResultType compensation(NeutralElement(ResultType{}, -1));
1161 ResultType y(NeutralElement(ResultType{}, -1));
1162 ResultType t(NeutralElement(ResultType{}, -1));
1163 for (auto &m : fSums) {
1164 // Kahan Sum:
1165 y = m - compensation;
1166 t = sum + y;
1167 compensation = (t - sum) - y;
1168 sum = t;
1169 }
1170 *fResultSum += sum;
1171 }
1172
1173 // Helper functions for RMergeableValue
1174 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1175 {
1176 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1177 }
1178
1179 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1180
1181 std::string GetActionName() { return "Sum"; }
1182
1183 SumHelper MakeNew(void *newResult)
1184 {
1185 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1186 *result = NeutralElement(*result, -1);
1187 return SumHelper(result, fSums.size());
1188 }
1189};
1190
1191class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1192 std::shared_ptr<double> fResultMean;
1193 std::vector<ULong64_t> fCounts;
1194 std::vector<double> fSums;
1195 std::vector<double> fPartialMeans;
1196 std::vector<double> fCompensations;
1197
1198public:
1199 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1200 MeanHelper(MeanHelper &&) = default;
1201 MeanHelper(const MeanHelper &) = delete;
1202 void InitTask(TTreeReader *, unsigned int) {}
1203 void Exec(unsigned int slot, double v);
1204
1205 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1206 void Exec(unsigned int slot, const T &vs)
1207 {
1208 for (auto &&v : vs) {
1209
1210 fCounts[slot]++;
1211 // Kahan Sum:
1212 double y = v - fCompensations[slot];
1213 double t = fSums[slot] + y;
1214 fCompensations[slot] = (t - fSums[slot]) - y;
1215 fSums[slot] = t;
1216 }
1217 }
1218
1219 void Initialize() { /* noop */}
1220
1221 void Finalize();
1222
1223 // Helper functions for RMergeableValue
1224 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1225 {
1226 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1227 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1228 }
1229
1230 double &PartialUpdate(unsigned int slot);
1231
1232 std::string GetActionName() { return "Mean"; }
1233
1234 MeanHelper MakeNew(void *newResult)
1235 {
1236 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1237 return MeanHelper(result, fSums.size());
1238 }
1239};
1240
1241extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
1242extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
1243extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
1244extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
1245extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1246
1247class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1248 // Number of subsets of data
1249 unsigned int fNSlots;
1250 std::shared_ptr<double> fResultStdDev;
1251 // Number of element for each slot
1252 std::vector<ULong64_t> fCounts;
1253 // Mean of each slot
1254 std::vector<double> fMeans;
1255 // Squared distance from the mean
1256 std::vector<double> fDistancesfromMean;
1257
1258public:
1259 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1260 StdDevHelper(StdDevHelper &&) = default;
1261 StdDevHelper(const StdDevHelper &) = delete;
1262 void InitTask(TTreeReader *, unsigned int) {}
1263 void Exec(unsigned int slot, double v);
1264
1265 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1266 void Exec(unsigned int slot, const T &vs)
1267 {
1268 for (auto &&v : vs) {
1269 Exec(slot, v);
1270 }
1271 }
1272
1273 void Initialize() { /* noop */}
1274
1275 void Finalize();
1276
1277 // Helper functions for RMergeableValue
1278 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1279 {
1280 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1281 const Double_t mean =
1282 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1283 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1284 }
1285
1286 std::string GetActionName() { return "StdDev"; }
1287
1288 StdDevHelper MakeNew(void *newResult)
1289 {
1290 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1291 return StdDevHelper(result, fCounts.size());
1292 }
1293};
1294
1295extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
1296extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
1297extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
1298extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
1299extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1300
1301template <typename PrevNodeType>
1302class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1303private:
1305 std::shared_ptr<Display_t> fDisplayerHelper;
1306 std::shared_ptr<PrevNodeType> fPrevNode;
1307 size_t fEntriesToProcess;
1308
1309public:
1310 DisplayHelper(size_t nRows, const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1311 : fDisplayerHelper(d), fPrevNode(prevNode), fEntriesToProcess(nRows)
1312 {
1313 }
1314 DisplayHelper(DisplayHelper &&) = default;
1315 DisplayHelper(const DisplayHelper &) = delete;
1316 void InitTask(TTreeReader *, unsigned int) {}
1317
1318 template <typename... Columns>
1319 void Exec(unsigned int, Columns &... columns)
1320 {
1321 if (fEntriesToProcess == 0)
1322 return;
1323
1324 fDisplayerHelper->AddRow(columns...);
1325 --fEntriesToProcess;
1326
1327 if (fEntriesToProcess == 0) {
1328 // No more entries to process. Send a one-time signal that this node
1329 // of the graph is done. It is important that the 'StopProcessing'
1330 // method is only called once from this helper, otherwise it would seem
1331 // like more than one operation has completed its work.
1332 fPrevNode->StopProcessing();
1333 }
1334 }
1335
1336 void Initialize() {}
1337
1338 void Finalize() {}
1339
1340 std::string GetActionName() { return "Display"; }
1341};
1342
1343template <typename T>
1344void *GetData(ROOT::VecOps::RVec<T> &v)
1345{
1346 return v.data();
1347}
1348
1349template <typename T>
1350void *GetData(T & /*v*/)
1351{
1352 return nullptr;
1353}
1354
1355template <typename T>
1356void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1357 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1358 bool /*isDefine*/)
1359{
1360 static TClassRef TBOClRef("TBranchObject");
1361
1362 TBranch *inputBranch = nullptr;
1363 if (inputTree) {
1364 inputBranch = inputTree->GetBranch(inName.c_str());
1365 if (!inputBranch) // try harder
1366 inputBranch = inputTree->FindBranch(inName.c_str());
1367 }
1368
1369 auto *outputBranch = outputBranches.Get(name);
1370 if (outputBranch) {
1371 // the output branch was already created, we just need to (re)set its address
1372 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1373 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1374 } else if (outputBranch->IsA() != TBranch::Class()) {
1375 branchAddress = address;
1376 outputBranch->SetAddress(&branchAddress);
1377 } else {
1378 outputBranch->SetAddress(address);
1379 branchAddress = address;
1380 }
1381 return;
1382 }
1383
1384 if (inputBranch) {
1385 // Respect the original bufsize and splitlevel arguments
1386 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1387 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1388 // TObject branch, see https://bit.ly/2EjLMId ).
1389 const auto bufSize = inputBranch->GetBasketSize();
1390 const auto splitLevel = inputBranch->GetSplitLevel();
1391
1392 if (inputBranch->IsA() == TBOClRef) {
1393 // Need to pass a pointer to pointer
1394 outputBranch =
1395 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1396 } else {
1397 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1398 }
1399 } else {
1400 outputBranch = outputTree.Branch(name.c_str(), address);
1401 }
1402 outputBranches.Insert(name, outputBranch);
1403 // This is not an array branch, so we don't register the address of the output branch here
1404 branch = nullptr;
1405 branchAddress = nullptr;
1406}
1407
1408/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1409/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1410/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1411/// title
1412/// 2. RVecs coming from a custom column or the input file/data-source
1413/// 3. vectors coming from ROOT files that are being read as RVecs
1414/// 4. TClonesArray
1415///
1416/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1417/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1418template <typename T>
1419void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1420 TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
1421{
1422 TBranch *inputBranch = nullptr;
1423 if (inputTree) {
1424 inputBranch = inputTree->GetBranch(inName.c_str());
1425 if (!inputBranch) // try harder
1426 inputBranch = inputTree->FindBranch(inName.c_str());
1427 }
1428 auto *outputBranch = outputBranches.Get(outName);
1429
1430 // if no backing input branch, we must write out an RVec
1431 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1432 // otherwise, if input branch is TClonesArray, must write out an RVec
1433 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1434 mustWriteRVec = true;
1435 Warning("Snapshot",
1436 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1437 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1438 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1439 inName.c_str());
1440 }
1441 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1442 if (!mustWriteRVec) {
1443 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1444 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
1445 mustWriteRVec = true;
1446 }
1447
1448 if (mustWriteRVec) {
1449 // Treat:
1450 // 2. RVec coming from a custom column or a source
1451 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1452 // 4. TClonesArray written out as RVec<T>
1453 if (outputBranch) {
1454 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1455 outputBranch->SetObject(ab);
1456 } else {
1457 auto *b = outputTree.Branch(outName.c_str(), ab);
1458 outputBranches.Insert(outName, b);
1459 }
1460 return;
1461 }
1462
1463 // else this must be a C-array, aka case 1.
1464 auto dataPtr = ab->data();
1465
1466 if (outputBranch) {
1467 if (outputBranch->IsA() != TBranch::Class()) {
1468 branchAddress = dataPtr;
1469 outputBranch->SetAddress(&branchAddress);
1470 } else {
1471 outputBranch->SetAddress(dataPtr);
1472 }
1473 } else {
1474 // must construct the leaflist for the output branch and create the branch in the output tree
1475 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1476 const auto bname = leaf->GetName();
1477 auto *sizeLeaf = leaf->GetLeafCount();
1478 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1479
1480 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1481 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1482 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1483 // branch to be successful. So we create the size leaf here.
1484 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1485 const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
1486 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1487 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1488 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1489 outputBranches.Insert(sizeLeafName, sizeBranch);
1490 }
1491
1492 const auto btype = leaf->GetTypeName();
1493 const auto rootbtype = TypeName2ROOTTypeName(btype);
1494 if (rootbtype == ' ') {
1495 Warning("Snapshot",
1496 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1497 "column will not be written out.",
1498 bname);
1499 } else {
1500 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1501 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1502 outputBranch->SetTitle(inputBranch->GetTitle());
1503 outputBranches.Insert(outName, outputBranch);
1504 branch = outputBranch;
1505 branchAddress = ab->data();
1506 }
1507 }
1508}
1509
1510void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1511
1512/// Helper object for a single-thread Snapshot action
1513template <typename... ColTypes>
1514class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl<SnapshotHelper<ColTypes...>> {
1515 std::string fFileName;
1516 std::string fDirName;
1517 std::string fTreeName;
1518 RSnapshotOptions fOptions;
1519 std::unique_ptr<TFile> fOutputFile;
1520 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1521 bool fBranchAddressesNeedReset{true};
1522 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1523 ColumnNames_t fOutputBranchNames;
1524 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1525 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1526 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1527 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1528 RBranchSet fOutputBranches;
1529 std::vector<bool> fIsDefine;
1530
1531public:
1532 using ColumnTypes_t = TypeList<ColTypes...>;
1533 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1534 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1535 std::vector<bool> &&isDefine)
1536 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1537 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1538 fBranchAddresses(vbnames.size(), nullptr), fIsDefine(std::move(isDefine))
1539 {
1540 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1541 }
1542
1543 SnapshotHelper(const SnapshotHelper &) = delete;
1544 SnapshotHelper(SnapshotHelper &&) = default;
1545 ~SnapshotHelper()
1546 {
1547 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy)
1548 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1549 }
1550
1551 void InitTask(TTreeReader *r, unsigned int /* slot */)
1552 {
1553 if (r)
1554 fInputTree = r->GetTree();
1555 fBranchAddressesNeedReset = true;
1556 }
1557
1558 void Exec(unsigned int /* slot */, ColTypes &... values)
1559 {
1560 using ind_t = std::index_sequence_for<ColTypes...>;
1561 if (!fBranchAddressesNeedReset) {
1562 UpdateCArraysPtrs(values..., ind_t{});
1563 } else {
1564 SetBranches(values..., ind_t{});
1565 fBranchAddressesNeedReset = false;
1566 }
1567 fOutputTree->Fill();
1568 }
1569
1570 template <std::size_t... S>
1571 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1572 {
1573 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1574 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1575 // leaving associated to the branch of the output tree an invalid pointer.
1576 // With this code, we set the value of the pointer in the output branch anew when needed.
1577 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1578 // we need an int for the expander list.
1579 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1580 ? fBranches[S]->SetAddress(GetData(values)),
1581 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1582 0};
1583 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1584 }
1585
1586 template <std::size_t... S>
1587 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1588 {
1589 // create branches in output tree
1590 int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
1591 fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
1592 0)...,
1593 0};
1594 fOutputBranches.AssertNoNullBranchAddresses();
1595 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1596 }
1597
1598 void Initialize()
1599 {
1600 fOutputFile.reset(
1601 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1603 if(!fOutputFile)
1604 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1605
1606 TDirectory *outputDir = fOutputFile.get();
1607 if (!fDirName.empty()) {
1608 TString checkupdate = fOptions.fMode;
1609 checkupdate.ToLower();
1610 if (checkupdate == "update")
1611 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1612 else
1613 outputDir = fOutputFile->mkdir(fDirName.c_str());
1614 }
1615
1616 fOutputTree =
1617 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1618
1619 if (fOptions.fAutoFlush)
1620 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1621 }
1622
1623 void Finalize()
1624 {
1625 assert(fOutputTree != nullptr);
1626 assert(fOutputFile != nullptr);
1627
1628 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1629 fOutputTree->AutoSave("flushbaskets");
1630 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1631 fOutputTree.reset();
1632 fOutputFile->Close();
1633 }
1634
1635 std::string GetActionName() { return "Snapshot"; }
1636
1637 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1638 {
1639 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1640 }
1641
1642 /**
1643 * @brief Create a new SnapshotHelper with a different output file name
1644 *
1645 * @param newName A type-erased string with the output file name
1646 * @return SnapshotHelper
1647 *
1648 * This MakeNew implementation is tied to the cloning feature of actions
1649 * of the computation graph. In particular, cloning a Snapshot node usually
1650 * also involves changing the name of the output file, otherwise the cloned
1651 * Snapshot would overwrite the same file.
1652 */
1653 SnapshotHelper MakeNew(void *newName)
1654 {
1655 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1656 return SnapshotHelper{
1657 finalName, fDirName, fTreeName, fInputBranchNames, fOutputBranchNames, fOptions, std::vector<bool>(fIsDefine)};
1658 }
1659};
1660
1661/// Helper object for a multi-thread Snapshot action
1662template <typename... ColTypes>
1663class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<ColTypes...>> {
1664 unsigned int fNSlots;
1665 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1666 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1667 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1668 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1669 std::string fFileName; // name of the output file name
1670 std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1671 std::string fTreeName; // name of output tree
1672 RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1673 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1674 ColumnNames_t fOutputBranchNames;
1675 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1676 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1677 std::vector<std::vector<TBranch *>> fBranches;
1678 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1679 std::vector<std::vector<void *>> fBranchAddresses;
1680 std::vector<RBranchSet> fOutputBranches;
1681 std::vector<bool> fIsDefine;
1682
1683public:
1684 using ColumnTypes_t = TypeList<ColTypes...>;
1685 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1686 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1687 const RSnapshotOptions &options, std::vector<bool> &&isDefine)
1688 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fBranchAddressesNeedReset(fNSlots, 1),
1689 fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1690 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots),
1691 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1692 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)), fOutputBranches(fNSlots),
1693 fIsDefine(std::move(isDefine))
1694 {
1695 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1696 }
1697 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1698 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1699 ~SnapshotHelperMT()
1700 {
1701 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy &&
1702 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */)
1703 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1704 }
1705
1706 void InitTask(TTreeReader *r, unsigned int slot)
1707 {
1708 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1709 if (!fOutputFiles[slot]) {
1710 // first time this thread executes something, let's create a TBufferMerger output directory
1711 fOutputFiles[slot] = fMerger->GetFile();
1712 }
1713 TDirectory *treeDirectory = fOutputFiles[slot].get();
1714 if (!fDirName.empty()) {
1715 // call returnExistingDirectory=true since MT can end up making this call multiple times
1716 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1717 }
1718 // re-create output tree as we need to create its branches again, with new input variables
1719 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1720 fOutputTrees[slot] =
1721 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1722 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1723 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1724 fOutputTrees[slot]->SetImplicitMT(false);
1725 if (fOptions.fAutoFlush)
1726 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1727 if (r) {
1728 // not an empty-source RDF
1729 fInputTrees[slot] = r->GetTree();
1730 }
1731 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1732 }
1733
1734 void FinalizeTask(unsigned int slot)
1735 {
1736 if (fOutputTrees[slot]->GetEntries() > 0)
1737 fOutputFiles[slot]->Write();
1738 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1739 fOutputTrees[slot].reset(nullptr);
1740 fOutputBranches[slot].Clear();
1741 }
1742
1743 void Exec(unsigned int slot, ColTypes &... values)
1744 {
1745 using ind_t = std::index_sequence_for<ColTypes...>;
1746 if (fBranchAddressesNeedReset[slot] == 0) {
1747 UpdateCArraysPtrs(slot, values..., ind_t{});
1748 } else {
1749 SetBranches(slot, values..., ind_t{});
1750 fBranchAddressesNeedReset[slot] = 0;
1751 }
1752 fOutputTrees[slot]->Fill();
1753 auto entries = fOutputTrees[slot]->GetEntries();
1754 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1755 if ((autoFlush > 0) && (entries % autoFlush == 0))
1756 fOutputFiles[slot]->Write();
1757 }
1758
1759 template <std::size_t... S>
1760 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1761 {
1762 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1763 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1764 // leaving associated to the branch of the output tree an invalid pointer.
1765 // With this code, we set the value of the pointer in the output branch anew when needed.
1766 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1767 // we need an int for the expander list.
1768 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1769 ? fBranches[slot][S]->SetAddress(GetData(values)),
1770 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1771 0};
1772 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1773 (void)slot; // Also "slot" might be unused, in case "values" is empty
1774 }
1775
1776 template <std::size_t... S>
1777 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1778 {
1779 // hack to call TTree::Branch on all variadic template arguments
1780 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1781 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1782 &values, fOutputBranches[slot], fIsDefine[S]),
1783 0)...,
1784 0};
1785 fOutputBranches[slot].AssertNoNullBranchAddresses();
1786 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1787 }
1788
1789 void Initialize()
1790 {
1791 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1792 auto out_file = TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs);
1793 if(!out_file)
1794 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1795 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(out_file));
1796 }
1797
1798 void Finalize()
1799 {
1800 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1801
1802 auto fileWritten = false;
1803 for (auto &file : fOutputFiles) {
1804 if (file) {
1805 file->Write();
1806 file->Close();
1807 fileWritten = true;
1808 }
1809 }
1810
1811 if (!fileWritten) {
1812 Warning("Snapshot",
1813 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1814 }
1815
1816 // flush all buffers to disk by destroying the TBufferMerger
1817 fOutputFiles.clear();
1818 fMerger.reset();
1819 }
1820
1821 std::string GetActionName() { return "Snapshot"; }
1822
1823 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1824 {
1825 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1826 }
1827
1828 /**
1829 * @brief Create a new SnapshotHelperMT with a different output file name
1830 *
1831 * @param newName A type-erased string with the output file name
1832 * @return SnapshotHelperMT
1833 *
1834 * This MakeNew implementation is tied to the cloning feature of actions
1835 * of the computation graph. In particular, cloning a Snapshot node usually
1836 * also involves changing the name of the output file, otherwise the cloned
1837 * Snapshot would overwrite the same file.
1838 */
1839 SnapshotHelperMT MakeNew(void *newName)
1840 {
1841 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1842 return SnapshotHelperMT{fNSlots, finalName, fDirName, fTreeName,
1843 fInputBranchNames, fOutputBranchNames, fOptions, std::vector<bool>(fIsDefine)};
1844 }
1845};
1846
1847template <typename Acc, typename Merge, typename R, typename T, typename U,
1848 bool MustCopyAssign = std::is_same<R, U>::value>
1849class R__CLING_PTRCHECK(off) AggregateHelper
1850 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1851 Acc fAggregate;
1852 Merge fMerge;
1853 std::shared_ptr<U> fResult;
1854 Results<U> fAggregators;
1855
1856public:
1857 using ColumnTypes_t = TypeList<T>;
1858
1859 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1860 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1861 {
1862 }
1863
1864 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1865 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
1866 {
1867 }
1868
1869 AggregateHelper(AggregateHelper &&) = default;
1870 AggregateHelper(const AggregateHelper &) = delete;
1871
1872 void InitTask(TTreeReader *, unsigned int) {}
1873
1874 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
1875 void Exec(unsigned int slot, const T &value)
1876 {
1877 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1878 }
1879
1880 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
1881 void Exec(unsigned int slot, const T &value)
1882 {
1883 fAggregate(fAggregators[slot], value);
1884 }
1885
1886 void Initialize() { /* noop */}
1887
1888 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1889 bool MergeAll = std::is_same<void, MergeRet>::value>
1890 std::enable_if_t<MergeAll, void> Finalize()
1891 {
1892 fMerge(fAggregators);
1893 *fResult = fAggregators[0];
1894 }
1895
1896 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1897 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1898 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
1899 {
1900 for (const auto &acc : fAggregators)
1901 *fResult = fMerge(*fResult, acc);
1902 }
1903
1904 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1905
1906 std::string GetActionName() { return "Aggregate"; }
1907
1908 AggregateHelper MakeNew(void *newResult)
1909 {
1910 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
1911 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
1912 }
1913};
1914
1915} // end of NS RDF
1916} // end of NS Internal
1917} // end of NS ROOT
1918
1919/// \endcond
1920
1921#endif
PyObject * fCallable
Handle_t Display_t
Display handle.
Definition GuiTypes.h:27
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define g(i)
Definition RSha256.hxx:105
#define h(i)
Definition RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition RtypesCore.h:59
unsigned long long ULong64_t
Definition RtypesCore.h:81
#define R__CLING_PTRCHECK(ONOFF)
Definition Rtypes.h:500
#define X(type, name)
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
TClass * IsA() const override
Definition TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
pointer data() noexcept
Return a pointer to the vector's buffer, even if empty().
Definition RVec.hxx:280
This class is the textual representation of the content of a columnar dataset.
Definition RDisplay.hxx:65
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
A TTree is a list of TBranches.
Definition TBranch.h:93
virtual const char * GetClassName() const
Return the name of the user class whose content is stored in this branch, if any.
Definition TBranch.cxx:1324
virtual char * GetAddress() const
Definition TBranch.h:212
static TClass * Class()
Int_t GetSplitLevel() const
Definition TBranch.h:250
TClass * IsA() const override
Definition TBranch.h:295
virtual Int_t GetBasketSize() const
Definition TBranch.h:217
TObjArray * GetListOfLeaves()
Definition TBranch.h:247
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:28
Collection abstract base class.
Definition TCollection.h:65
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4089
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:669
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
A doubly linked list.
Definition TList.h:38
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
TObject * UncheckedAt(Int_t i) const
Definition TObjArray.h:84
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:44
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition TTree.cxx:4841
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition TTree.cxx:5294
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition TTree.h:353
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:261
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
#define F(x, y, z)
#define H(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition API.cxx:446
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:314
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:259
constexpr std::size_t FindIdxTrue(const T &arr)
Definition Utils.hxx:231
void(off) SmallVectorTemplateBase< T
double T(double x)
std::vector< std::string > ColumnNames_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition RooArgSet.h:240
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
__device__ AFloat max(AFloat x, AFloat y)
Definition Kernels.cuh:207
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition tmvaglob.cxx:176
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345