Logo ROOT   6.18/05
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDFOPERATIONS
12#define ROOT_RDFOPERATIONS
13
14#include <algorithm>
15#include <limits>
16#include <memory>
17#include <stdexcept>
18#include <string>
19#include <type_traits>
20#include <vector>
21#include <iomanip>
22
23#include "Compression.h"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/RVec.hxx"
27#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
29#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RMakeUnique.hxx"
32#include "ROOT/TypeTraits.hxx"
33#include "ROOT/RDF/RDisplay.hxx"
34#include "RtypesCore.h"
35#include "TBranch.h"
36#include "TClassEdit.h"
37#include "TDirectory.h"
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TLeaf.h"
42#include "TObjArray.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46
47/// \cond HIDDEN_SYMBOLS
48
49namespace ROOT {
50namespace Detail {
51namespace RDF {
52template <typename Helper>
53class RActionImpl {
54public:
55 // call Helper::FinalizeTask if present, do nothing otherwise
56 template <typename T = Helper>
57 auto CallFinalizeTask(unsigned int slot) -> decltype(&T::FinalizeTask, void())
58 {
59 static_cast<Helper *>(this)->FinalizeTask(slot);
60 }
61
62 template <typename... Args>
63 void CallFinalizeTask(unsigned int, Args...) {}
64
65};
66
67} // namespace RDF
68} // namespace Detail
69
70namespace Internal {
71namespace RDF {
72using namespace ROOT::TypeTraits;
73using namespace ROOT::VecOps;
74using namespace ROOT::RDF;
75using namespace ROOT::Detail::RDF;
76
77using Hist_t = ::TH1D;
78
79/// The container type for each thread's partial result in an action helper
80// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
81// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
82// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
83template <typename T>
84using Results = typename std::conditional<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>::type;
85
86template <typename F>
87class ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
88 F fCallable;
89
90public:
91 using ColumnTypes_t = RemoveFirstParameter_t<typename CallableTraits<F>::arg_types>;
92 ForeachSlotHelper(F &&f) : fCallable(f) {}
93 ForeachSlotHelper(ForeachSlotHelper &&) = default;
94 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
95
96 void InitTask(TTreeReader *, unsigned int) {}
97
98 template <typename... Args>
99 void Exec(unsigned int slot, Args &&... args)
100 {
101 // check that the decayed types of Args are the same as the branch types
102 static_assert(std::is_same<TypeList<typename std::decay<Args>::type...>, ColumnTypes_t>::value, "");
103 fCallable(slot, std::forward<Args>(args)...);
104 }
105
106 void Initialize() { /* noop */}
107
108 void Finalize() { /* noop */}
109
110 std::string GetActionName() { return "ForeachSlot"; }
111};
112
113class CountHelper : public RActionImpl<CountHelper> {
114 const std::shared_ptr<ULong64_t> fResultCount;
115 Results<ULong64_t> fCounts;
116
117public:
118 using ColumnTypes_t = TypeList<>;
119 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
120 CountHelper(CountHelper &&) = default;
121 CountHelper(const CountHelper &) = delete;
122 void InitTask(TTreeReader *, unsigned int) {}
123 void Exec(unsigned int slot);
124 void Initialize() { /* noop */}
125 void Finalize();
126 ULong64_t &PartialUpdate(unsigned int slot);
127
128 std::string GetActionName() { return "Count"; }
129};
130
131template <typename ProxiedVal_t>
132class ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
133 const std::shared_ptr<RCutFlowReport> fReport;
134 // Here we have a weak pointer since we need to keep track of the validity
135 // of the proxied node. It can happen that the user does not trigger the
136 // event loop by looking into the RResultPtr and the chain goes out of scope
137 // before the Finalize method is invoked.
138 std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
139 bool fReturnEmptyReport;
140
141public:
142 using ColumnTypes_t = TypeList<>;
143 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
144 : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
145 ReportHelper(ReportHelper &&) = default;
146 ReportHelper(const ReportHelper &) = delete;
147 void InitTask(TTreeReader *, unsigned int) {}
148 void Exec(unsigned int /* slot */) {}
149 void Initialize() { /* noop */}
150 void Finalize()
151 {
152 // We need the weak_ptr in order to avoid crashes at tear down
153 if (!fReturnEmptyReport && !fProxiedWPtr.expired())
154 fProxiedWPtr.lock()->Report(*fReport);
155 }
156
157 std::string GetActionName() { return "Report"; }
158};
159
160class FillHelper : public RActionImpl<FillHelper> {
161 // this sets a total initial size of 16 MB for the buffers (can increase)
162 static constexpr unsigned int fgTotalBufSize = 2097152;
163 using BufEl_t = double;
164 using Buf_t = std::vector<BufEl_t>;
165
166 std::vector<Buf_t> fBuffers;
167 std::vector<Buf_t> fWBuffers;
168 const std::shared_ptr<Hist_t> fResultHist;
169 unsigned int fNSlots;
170 unsigned int fBufSize;
171 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
172 Results<std::unique_ptr<Hist_t>> fPartialHists;
173 Buf_t fMin;
174 Buf_t fMax;
175
176 void UpdateMinMax(unsigned int slot, double v);
177
178public:
179 FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
180 FillHelper(FillHelper &&) = default;
181 FillHelper(const FillHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot, double v);
184 void Exec(unsigned int slot, double v, double w);
185
186 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
187 void Exec(unsigned int slot, const T &vs)
188 {
189 auto &thisBuf = fBuffers[slot];
190 for (auto &v : vs) {
191 UpdateMinMax(slot, v);
192 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
193 }
194 }
195
196 template <typename T, typename W,
197 typename std::enable_if<IsContainer<T>::value && IsContainer<W>::value, int>::type = 0>
198 void Exec(unsigned int slot, const T &vs, const W &ws)
199 {
200 auto &thisBuf = fBuffers[slot];
201
202 for (auto &v : vs) {
203 UpdateMinMax(slot, v);
204 thisBuf.emplace_back(v);
205 }
206
207 auto &thisWBuf = fWBuffers[slot];
208 for (auto &w : ws) {
209 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
210 }
211 }
212
213 template <typename T, typename W,
214 typename std::enable_if<IsContainer<T>::value && !IsContainer<W>::value, int>::type = 0>
215 void Exec(unsigned int slot, const T &vs, const W w)
216 {
217 auto &thisBuf = fBuffers[slot];
218 for (auto &v : vs) {
219 UpdateMinMax(slot, v);
220 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
221 }
222
223 auto &thisWBuf = fWBuffers[slot];
224 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
225 }
226
227 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
228 template <typename T, typename W,
229 typename std::enable_if<IsContainer<W>::value && !IsContainer<T>::value, int>::type = 0>
230 void Exec(unsigned int, const T &, const W &)
231 {
232 throw std::runtime_error(
233 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
234 }
235
236 Hist_t &PartialUpdate(unsigned int);
237
238 void Initialize() { /* noop */}
239
240 void Finalize();
241
242 std::string GetActionName() { return "Fill"; }
243};
244
245extern template void FillHelper::Exec(unsigned int, const std::vector<float> &);
246extern template void FillHelper::Exec(unsigned int, const std::vector<double> &);
247extern template void FillHelper::Exec(unsigned int, const std::vector<char> &);
248extern template void FillHelper::Exec(unsigned int, const std::vector<int> &);
249extern template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
250extern template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
251extern template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
252extern template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
253extern template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
254extern template void
255FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
256
257template <typename HIST = Hist_t>
258class FillParHelper : public RActionImpl<FillParHelper<HIST>> {
259 std::vector<HIST *> fObjects;
260
261public:
262 FillParHelper(FillParHelper &&) = default;
263 FillParHelper(const FillParHelper &) = delete;
264
265 FillParHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
266 {
267 fObjects[0] = h.get();
268 // Initialise all other slots
269 for (unsigned int i = 1; i < nSlots; ++i) {
270 fObjects[i] = new HIST(*fObjects[0]);
271 if (auto objAsHist = dynamic_cast<TH1*>(fObjects[i])) {
272 objAsHist->SetDirectory(nullptr);
273 }
274 }
275 }
276
277 void InitTask(TTreeReader *, unsigned int) {}
278
279 void Exec(unsigned int slot, double x0) // 1D histos
280 {
281 fObjects[slot]->Fill(x0);
282 }
283
284 void Exec(unsigned int slot, double x0, double x1) // 1D weighted and 2D histos
285 {
286 fObjects[slot]->Fill(x0, x1);
287 }
288
289 void Exec(unsigned int slot, double x0, double x1, double x2) // 2D weighted and 3D histos
290 {
291 fObjects[slot]->Fill(x0, x1, x2);
292 }
293
294 void Exec(unsigned int slot, double x0, double x1, double x2, double x3) // 3D weighted histos
295 {
296 fObjects[slot]->Fill(x0, x1, x2, x3);
297 }
298
299 template <typename X0, typename std::enable_if<IsContainer<X0>::value, int>::type = 0>
300 void Exec(unsigned int slot, const X0 &x0s)
301 {
302 auto thisSlotH = fObjects[slot];
303 for (auto &x0 : x0s) {
304 thisSlotH->Fill(x0); // TODO: Can be optimised in case T == vector<double>
305 }
306 }
307
308 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
309 template <typename X0, typename X1,
310 typename std::enable_if<IsContainer<X1>::value && !IsContainer<X0>::value, int>::type = 0>
311 void Exec(unsigned int , const X0 &, const X1 &)
312 {
313 throw std::runtime_error(
314 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
315 }
316
317 template <typename X0, typename X1,
318 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value, int>::type = 0>
319 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
320 {
321 auto thisSlotH = fObjects[slot];
322 if (x0s.size() != x1s.size()) {
323 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
324 }
325 auto x0sIt = std::begin(x0s);
326 const auto x0sEnd = std::end(x0s);
327 auto x1sIt = std::begin(x1s);
328 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
329 thisSlotH->Fill(*x0sIt, *x1sIt); // TODO: Can be optimised in case T == vector<double>
330 }
331 }
332
333 template <typename X0, typename W,
334 typename std::enable_if<IsContainer<X0>::value && !IsContainer<W>::value, int>::type = 0>
335 void Exec(unsigned int slot, const X0 &x0s, const W w)
336 {
337 auto thisSlotH = fObjects[slot];
338 for (auto &&x : x0s) {
339 thisSlotH->Fill(x, w);
340 }
341 }
342
343 template <typename X0, typename X1, typename X2,
344 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value,
345 int>::type = 0>
346 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s)
347 {
348 auto thisSlotH = fObjects[slot];
349 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
350 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
351 }
352 auto x0sIt = std::begin(x0s);
353 const auto x0sEnd = std::end(x0s);
354 auto x1sIt = std::begin(x1s);
355 auto x2sIt = std::begin(x2s);
356 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
357 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt); // TODO: Can be optimised in case T == vector<double>
358 }
359 }
360
361 template <typename X0, typename X1, typename W,
362 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && !IsContainer<W>::value,
363 int>::type = 0>
364 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const W w)
365 {
366 auto thisSlotH = fObjects[slot];
367 if (x0s.size() != x1s.size()) {
368 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
369 }
370 auto x0sIt = std::begin(x0s);
371 const auto x0sEnd = std::end(x0s);
372 auto x1sIt = std::begin(x1s);
373 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
374 thisSlotH->Fill(*x0sIt, *x1sIt, w); // TODO: Can be optimised in case T == vector<double>
375 }
376 }
377
378 template <typename X0, typename X1, typename X2, typename X3,
379 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
381 int>::type = 0>
382 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const X3 &x3s)
383 {
384 auto thisSlotH = fObjects[slot];
385 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size() && x1s.size() == x3s.size())) {
386 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
387 }
388 auto x0sIt = std::begin(x0s);
389 const auto x0sEnd = std::end(x0s);
390 auto x1sIt = std::begin(x1s);
391 auto x2sIt = std::begin(x2s);
392 auto x3sIt = std::begin(x3s);
393 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++, x3sIt++) {
394 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, *x3sIt); // TODO: Can be optimised in case T == vector<double>
395 }
396 }
397
398 template <typename X0, typename X1, typename X2, typename W,
399 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
401 int>::type = 0>
402 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const W w)
403 {
404 auto thisSlotH = fObjects[slot];
405 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
406 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
407 }
408 auto x0sIt = std::begin(x0s);
409 const auto x0sEnd = std::end(x0s);
410 auto x1sIt = std::begin(x1s);
411 auto x2sIt = std::begin(x2s);
412 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
413 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, w);
414 }
415 }
416
417 void Initialize() { /* noop */}
418
419 void Finalize()
420 {
421 auto resObj = fObjects[0];
422 const auto nSlots = fObjects.size();
423 TList l;
424 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
425 for (unsigned int slot = 1; slot < nSlots; ++slot) {
426 l.Add(fObjects[slot]);
427 }
428
429 resObj->Merge(&l);
430 }
431
432 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
433
434 std::string GetActionName() { return "FillPar"; }
435};
436
437class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
438public:
439 using Result_t = ::TGraph;
440
441private:
442 std::vector<::TGraph *> fGraphs;
443
444public:
445 FillTGraphHelper(FillTGraphHelper &&) = default;
446 FillTGraphHelper(const FillTGraphHelper &) = delete;
447
448 // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
449 // this method
450 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
451 {
452 fGraphs[0] = g.get();
453 // Initialise all other slots
454 for (unsigned int i = 1; i < nSlots; ++i) {
455 fGraphs[i] = new TGraph(*fGraphs[0]);
456 }
457 }
458
459 void Initialize() {}
460 void InitTask(TTreeReader *, unsigned int) {}
461
462 template <typename X0, typename X1,
463 typename std::enable_if<
465 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
466 {
467 if (x0s.size() != x1s.size()) {
468 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
469 }
470 auto thisSlotG = fGraphs[slot];
471 auto x0sIt = std::begin(x0s);
472 const auto x0sEnd = std::end(x0s);
473 auto x1sIt = std::begin(x1s);
474 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
475 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
476 }
477 }
478
479 template <typename X0, typename X1>
480 void Exec(unsigned int slot, X0 x0, X1 x1)
481 {
482 auto thisSlotG = fGraphs[slot];
483 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
484 }
485
486 void Finalize()
487 {
488 const auto nSlots = fGraphs.size();
489 auto resGraph = fGraphs[0];
490 TList l;
491 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
492 for (unsigned int slot = 1; slot < nSlots; ++slot) {
493 l.Add(fGraphs[slot]);
494 }
495 resGraph->Merge(&l);
496 }
497
498 std::string GetActionName() { return "Graph"; }
499
500 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
501};
502
503// In case of the take helper we have 4 cases:
504// 1. The column is not an RVec, the collection is not a vector
505// 2. The column is not an RVec, the collection is a vector
506// 3. The column is an RVec, the collection is not a vector
507// 4. The column is an RVec, the collection is a vector
508
509template <typename V, typename COLL>
510void FillColl(V&& v, COLL& c) {
511 c.emplace_back(v);
512}
513
514// Use push_back for bool since some compilers do not support emplace_back.
515template <typename COLL>
516void FillColl(bool v, COLL& c) {
517 c.push_back(v);
518}
519
520// Case 1.: The column is not an RVec, the collection is not a vector
521// No optimisations, no transformations: just copies.
522template <typename RealT_t, typename T, typename COLL>
523class TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
524 Results<std::shared_ptr<COLL>> fColls;
525
526public:
527 using ColumnTypes_t = TypeList<T>;
528 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
529 {
530 fColls.emplace_back(resultColl);
531 for (unsigned int i = 1; i < nSlots; ++i)
532 fColls.emplace_back(std::make_shared<COLL>());
533 }
534 TakeHelper(TakeHelper &&);
535 TakeHelper(const TakeHelper &) = delete;
536
537 void InitTask(TTreeReader *, unsigned int) {}
538
539 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
540
541 void Initialize() { /* noop */}
542
543 void Finalize()
544 {
545 auto rColl = fColls[0];
546 for (unsigned int i = 1; i < fColls.size(); ++i) {
547 const auto &coll = fColls[i];
548 const auto end = coll->end();
549 // Use an explicit loop here to prevent compiler warnings introduced by
550 // clang's range-based loop analysis and vector<bool> references.
551 for (auto j = coll->begin(); j != end; j++) {
552 FillColl(*j, *rColl);
553 }
554 }
555 }
556
557 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
558
559 std::string GetActionName() { return "Take"; }
560};
561
562// Case 2.: The column is not an RVec, the collection is a vector
563// Optimisations, no transformations: just copies.
564template <typename RealT_t, typename T>
565class TakeHelper<RealT_t, T, std::vector<T>> : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
566 Results<std::shared_ptr<std::vector<T>>> fColls;
567
568public:
569 using ColumnTypes_t = TypeList<T>;
570 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
571 {
572 fColls.emplace_back(resultColl);
573 for (unsigned int i = 1; i < nSlots; ++i) {
574 auto v = std::make_shared<std::vector<T>>();
575 v->reserve(1024);
576 fColls.emplace_back(v);
577 }
578 }
579 TakeHelper(TakeHelper &&);
580 TakeHelper(const TakeHelper &) = delete;
581
582 void InitTask(TTreeReader *, unsigned int) {}
583
584 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
585
586 void Initialize() { /* noop */}
587
588 // This is optimised to treat vectors
589 void Finalize()
590 {
591 ULong64_t totSize = 0;
592 for (auto &coll : fColls)
593 totSize += coll->size();
594 auto rColl = fColls[0];
595 rColl->reserve(totSize);
596 for (unsigned int i = 1; i < fColls.size(); ++i) {
597 auto &coll = fColls[i];
598 rColl->insert(rColl->end(), coll->begin(), coll->end());
599 }
600 }
601
602 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
603
604 std::string GetActionName() { return "Take"; }
605};
606
607// Case 3.: The column is a RVec, the collection is not a vector
608// No optimisations, transformations from RVecs to vectors
609template <typename RealT_t, typename COLL>
610class TakeHelper<RealT_t, RVec<RealT_t>, COLL> : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
611 Results<std::shared_ptr<COLL>> fColls;
612
613public:
614 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
615 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
616 {
617 fColls.emplace_back(resultColl);
618 for (unsigned int i = 1; i < nSlots; ++i)
619 fColls.emplace_back(std::make_shared<COLL>());
620 }
621 TakeHelper(TakeHelper &&);
622 TakeHelper(const TakeHelper &) = delete;
623
624 void InitTask(TTreeReader *, unsigned int) {}
625
626 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
627
628 void Initialize() { /* noop */}
629
630 void Finalize()
631 {
632 auto rColl = fColls[0];
633 for (unsigned int i = 1; i < fColls.size(); ++i) {
634 auto &coll = fColls[i];
635 for (auto &v : *coll) {
636 rColl->emplace_back(v);
637 }
638 }
639 }
640
641 std::string GetActionName() { return "Take"; }
642};
643
644// Case 4.: The column is an RVec, the collection is a vector
645// Optimisations, transformations from RVecs to vectors
646template <typename RealT_t>
647class TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
648 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
649
650 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
651
652public:
653 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
654 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
655 {
656 fColls.emplace_back(resultColl);
657 for (unsigned int i = 1; i < nSlots; ++i) {
658 auto v = std::make_shared<std::vector<RealT_t>>();
659 v->reserve(1024);
660 fColls.emplace_back(v);
661 }
662 }
663 TakeHelper(TakeHelper &&);
664 TakeHelper(const TakeHelper &) = delete;
665
666 void InitTask(TTreeReader *, unsigned int) {}
667
668 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
669
670 void Initialize() { /* noop */}
671
672 // This is optimised to treat vectors
673 void Finalize()
674 {
675 ULong64_t totSize = 0;
676 for (auto &coll : fColls)
677 totSize += coll->size();
678 auto rColl = fColls[0];
679 rColl->reserve(totSize);
680 for (unsigned int i = 1; i < fColls.size(); ++i) {
681 auto &coll = fColls[i];
682 rColl->insert(rColl->end(), coll->begin(), coll->end());
683 }
684 }
685
686 std::string GetActionName() { return "Take"; }
687};
688
689// Extern templates for TakeHelper
690// NOTE: The move-constructor of specializations declared as extern templates
691// must be defined out of line, otherwise cling fails to find its symbol.
692template <typename RealT_t, typename T, typename COLL>
693TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
694template <typename RealT_t, typename T>
695TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
696template <typename RealT_t, typename COLL>
697TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
698template <typename RealT_t>
699TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
700
701// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
702#if __GNUC__ > 5
703extern template class TakeHelper<bool, bool, std::vector<bool>>;
704extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
705extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
706extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
707extern template class TakeHelper<int, int, std::vector<int>>;
708extern template class TakeHelper<long, long, std::vector<long>>;
709extern template class TakeHelper<long long, long long, std::vector<long long>>;
710extern template class TakeHelper<float, float, std::vector<float>>;
711extern template class TakeHelper<double, double, std::vector<double>>;
712#endif
713
714
715template <typename ResultType>
716class MinHelper : public RActionImpl<MinHelper<ResultType>> {
717 const std::shared_ptr<ResultType> fResultMin;
718 Results<ResultType> fMins;
719
720public:
721 MinHelper(MinHelper &&) = default;
722 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
723 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
724 {
725 }
726
727 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
728
729 void InitTask(TTreeReader *, unsigned int) {}
730
731 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
732 void Exec(unsigned int slot, const T &vs)
733 {
734 for (auto &&v : vs)
735 fMins[slot] = std::min(v, fMins[slot]);
736 }
737
738 void Initialize() { /* noop */}
739
740 void Finalize()
741 {
742 *fResultMin = std::numeric_limits<ResultType>::max();
743 for (auto &m : fMins)
744 *fResultMin = std::min(m, *fResultMin);
745 }
746
747 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
748
749 std::string GetActionName() { return "Min"; }
750};
751
752// TODO
753// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
754// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
755// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
756// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
757// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
758
759template <typename ResultType>
760class MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
761 const std::shared_ptr<ResultType> fResultMax;
762 Results<ResultType> fMaxs;
763
764public:
765 MaxHelper(MaxHelper &&) = default;
766 MaxHelper(const MaxHelper &) = delete;
767 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
768 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
769 {
770 }
771
772 void InitTask(TTreeReader *, unsigned int) {}
773 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
774
775 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
776 void Exec(unsigned int slot, const T &vs)
777 {
778 for (auto &&v : vs)
779 fMaxs[slot] = std::max((ResultType)v, fMaxs[slot]);
780 }
781
782 void Initialize() { /* noop */}
783
784 void Finalize()
785 {
786 *fResultMax = std::numeric_limits<ResultType>::lowest();
787 for (auto &m : fMaxs) {
788 *fResultMax = std::max(m, *fResultMax);
789 }
790 }
791
792 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
793
794 std::string GetActionName() { return "Max"; }
795};
796
797// TODO
798// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
799// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
800// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
801// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
802// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
803
804template <typename ResultType>
805class SumHelper : public RActionImpl<SumHelper<ResultType>> {
806 const std::shared_ptr<ResultType> fResultSum;
807 Results<ResultType> fSums;
808
809 /// Evaluate neutral element for this type and the sum operation.
810 /// This is assumed to be any_value - any_value if operator- is defined
811 /// for the type, otherwise a default-constructed ResultType{} is used.
812 template <typename T = ResultType>
813 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
814 {
815 return v - v;
816 }
817
818 template <typename T = ResultType, typename Dummy = int>
819 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
820 {
821 return ResultType{};
822 }
823
824public:
825 SumHelper(SumHelper &&) = default;
826 SumHelper(const SumHelper &) = delete;
827 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
828 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1))
829 {
830 }
831
832 void InitTask(TTreeReader *, unsigned int) {}
833 void Exec(unsigned int slot, ResultType v) { fSums[slot] += v; }
834
835 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
836 void Exec(unsigned int slot, const T &vs)
837 {
838 for (auto &&v : vs)
839 fSums[slot] += static_cast<ResultType>(v);
840 }
841
842 void Initialize() { /* noop */}
843
844 void Finalize()
845 {
846 for (auto &m : fSums)
847 *fResultSum += m;
848 }
849
850 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
851
852 std::string GetActionName() { return "Sum"; }
853};
854
855class MeanHelper : public RActionImpl<MeanHelper> {
856 const std::shared_ptr<double> fResultMean;
857 std::vector<ULong64_t> fCounts;
858 std::vector<double> fSums;
859 std::vector<double> fPartialMeans;
860
861public:
862 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
863 MeanHelper(MeanHelper &&) = default;
864 MeanHelper(const MeanHelper &) = delete;
865 void InitTask(TTreeReader *, unsigned int) {}
866 void Exec(unsigned int slot, double v);
867
868 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
869 void Exec(unsigned int slot, const T &vs)
870 {
871 for (auto &&v : vs) {
872 fSums[slot] += v;
873 fCounts[slot]++;
874 }
875 }
876
877 void Initialize() { /* noop */}
878
879 void Finalize();
880
881 double &PartialUpdate(unsigned int slot);
882
883 std::string GetActionName() { return "Mean"; }
884};
885
886extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
887extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
888extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
889extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
890extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
891
892class StdDevHelper : public RActionImpl<StdDevHelper> {
893 // Number of subsets of data
894 const unsigned int fNSlots;
895 const std::shared_ptr<double> fResultStdDev;
896 // Number of element for each slot
897 std::vector<ULong64_t> fCounts;
898 // Mean of each slot
899 std::vector<double> fMeans;
900 // Squared distance from the mean
901 std::vector<double> fDistancesfromMean;
902
903public:
904 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
905 StdDevHelper(StdDevHelper &&) = default;
906 StdDevHelper(const StdDevHelper &) = delete;
907 void InitTask(TTreeReader *, unsigned int) {}
908 void Exec(unsigned int slot, double v);
909
910 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
911 void Exec(unsigned int slot, const T &vs)
912 {
913 for (auto &&v : vs) {
914 Exec(slot, v);
915 }
916 }
917
918 void Initialize() { /* noop */}
919
920 void Finalize();
921
922 std::string GetActionName() { return "StdDev"; }
923};
924
925extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
926extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
927extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
928extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
929extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
930
931template <typename PrevNodeType>
932class DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
933private:
935 const std::shared_ptr<Display_t> fDisplayerHelper;
936 const std::shared_ptr<PrevNodeType> fPrevNode;
937
938public:
939 DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
940 : fDisplayerHelper(d), fPrevNode(prevNode)
941 {
942 }
943 DisplayHelper(DisplayHelper &&) = default;
944 DisplayHelper(const DisplayHelper &) = delete;
945 void InitTask(TTreeReader *, unsigned int) {}
946
947 template <typename... Columns>
948 void Exec(unsigned int, Columns... columns)
949 {
950 fDisplayerHelper->AddRow(columns...);
951 if (!fDisplayerHelper->HasNext()) {
952 fPrevNode->StopProcessing();
953 }
954 }
955
956 void Initialize() {}
957
958 void Finalize() {}
959
960 std::string GetActionName() { return "Display"; }
961};
962
963// std::vector<bool> is special, and not in a good way. As a consequence Snapshot of RVec<bool> needs to be treated
964// specially. In particular, if RVec<bool> is filled with a (fixed or variable size) boolean array coming from
965// a ROOT file, when writing out the correspinding branch from a Snapshot we do not have an address to set for the
966// TTree branch (std::vector<bool> and, consequently, RVec<bool> do not provide a `data()` method).
967// Bools is a lightweight wrapper around a C array of booleans that is meant to provide a stable address for the
968// output TTree to read the contents of the snapshotted branches at Fill time.
969class BoolArray {
970 std::size_t fSize = 0;
971 bool *fBools = nullptr;
972
973 bool *CopyVector(const RVec<bool> &v)
974 {
975 auto b = new bool[fSize];
976 std::copy(v.begin(), v.end(), b);
977 return b;
978 }
979
980 bool *CopyArray(bool *o, std::size_t size)
981 {
982 auto b = new bool[size];
983 for (auto i = 0u; i < size; ++i)
984 b[i] = o[i];
985 return b;
986 }
987
988public:
989 // this generic constructor could be replaced with a constexpr if in SetBranchesHelper
990 BoolArray() = default;
991 template <typename T>
992 BoolArray(const T &) { throw std::runtime_error("This constructor should never be called"); }
993 BoolArray(const RVec<bool> &v) : fSize(v.size()), fBools(CopyVector(v)) {}
994 BoolArray(const BoolArray &b)
995 {
996 CopyArray(b.fBools, b.fSize);
997 }
998 BoolArray &operator=(const BoolArray &b)
999 {
1000 delete[] fBools;
1001 CopyArray(b.fBools, b.fSize);
1002 return *this;
1003 }
1004 BoolArray(BoolArray &&b)
1005 {
1006 fSize = b.fSize;
1007 fBools = b.fBools;
1008 b.fSize = 0;
1009 b.fBools = nullptr;
1010 }
1011 BoolArray &operator=(BoolArray &&b)
1012 {
1013 delete[] fBools;
1014 fSize = b.fSize;
1015 fBools = b.fBools;
1016 b.fSize = 0;
1017 b.fBools = nullptr;
1018 return *this;
1019 }
1020 ~BoolArray() { delete[] fBools; }
1021 std::size_t Size() const { return fSize; }
1022 bool *Data() { return fBools; }
1023};
1024using BoolArrayMap = std::map<std::string, BoolArray>;
1025
1026inline bool *UpdateBoolArrayIfBool(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName)
1027{
1028 // create a boolArrays entry
1029 boolArrays[outName] = BoolArray(v);
1030 return boolArrays[outName].Data();
1031}
1032
1033template <typename T>
1034T *UpdateBoolArrayIfBool(BoolArrayMap &, RVec<T> &v, const std::string &)
1035{
1036 return v.data();
1037}
1038
1039// Helper which gets the return value of the data() method if the type is an
1040// RVec (of anything but a bool), nullptr otherwise.
1041inline void *GetData(ROOT::VecOps::RVec<bool> & /*v*/)
1042{
1043 return nullptr;
1044}
1045
1046template <typename T>
1047void *GetData(ROOT::VecOps::RVec<T> &v)
1048{
1049 return v.data();
1050}
1051
1052template <typename T>
1053void *GetData(T & /*v*/)
1054{
1055 return nullptr;
1056}
1057
1058
1059template <typename T>
1060void SetBranchesHelper(BoolArrayMap &, TTree * /*inputTree*/, TTree &outputTree, const std::string & /*validName*/,
1061 const std::string &name, TBranch *& branch, void *& branchAddress, T *address)
1062{
1063 outputTree.Branch(name.c_str(), address);
1064 branch = nullptr;
1065 branchAddress = nullptr;
1066}
1067
1068/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1069/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1070/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch title
1071/// 2. RVecs coming from a custom column or a source
1072/// 3. vectors coming from ROOT files
1073/// In case of 1., we save the pointer to the branch and the pointer to the input value. In case of 2. and 3. we save
1074/// nullptrs.
1075template <typename T>
1076void SetBranchesHelper(BoolArrayMap &boolArrays, TTree *inputTree, TTree &outputTree, const std::string &inName,
1077 const std::string &outName, TBranch *&branch, void *&branchAddress, RVec<T> *ab)
1078{
1079 auto *const inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1080 const auto mustWriteStdVec =
1081 !inputBranch || ROOT::ESTLType::kSTLvector == TClassEdit::IsSTLCont(inputBranch->GetClassName());
1082
1083 if (mustWriteStdVec) {
1084 // Treat 2. and 3.:
1085 // 2. RVec coming from a custom column or a source
1086 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1087 outputTree.Branch(outName.c_str(), &ab->AsVector());
1088 return;
1089 }
1090
1091 // Treat 1, the C-array case
1092 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1093 const auto bname = leaf->GetName();
1094 const auto counterStr =
1095 leaf->GetLeafCount() ? std::string(leaf->GetLeafCount()->GetName()) : std::to_string(leaf->GetLenStatic());
1096 const auto btype = leaf->GetTypeName();
1097 const auto rootbtype = TypeName2ROOTTypeName(btype);
1098 const auto leaflist = std::string(bname) + "[" + counterStr + "]/" + rootbtype;
1099
1100 /// RVec<bool> is special because std::vector<bool> is special. In particular, it has no `data()`,
1101 /// so we need to explicitly manage storage of the data that the tree needs to Fill branches with.
1102 auto dataPtr = UpdateBoolArrayIfBool(boolArrays, *ab, outName);
1103
1104 auto *const outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1105 outputBranch->SetTitle(inputBranch->GetTitle());
1106
1107 // Record the branch ptr and the address associated to it if this is not a bool array
1108 if (!std::is_same<bool, T>::value) {
1109 branch = outputBranch;
1110 branchAddress = GetData(*ab);
1111 }
1112}
1113
1114// generic version, no-op
1115template <typename T>
1116void UpdateBoolArray(BoolArrayMap &, T&, const std::string &, TTree &) {}
1117
1118// RVec<bool> overload, update boolArrays if needed
1119inline void UpdateBoolArray(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName, TTree &t)
1120{
1121 if (v.size() > boolArrays[outName].Size()) {
1122 boolArrays[outName] = BoolArray(v); // resize and copy
1123 t.SetBranchAddress(outName.c_str(), boolArrays[outName].Data());
1124 }
1125 else {
1126 std::copy(v.begin(), v.end(), boolArrays[outName].Data()); // just copy
1127 }
1128}
1129
1130/// Helper object for a single-thread Snapshot action
1131template <typename... BranchTypes>
1132class SnapshotHelper : public RActionImpl<SnapshotHelper<BranchTypes...>> {
1133 const std::string fFileName;
1134 const std::string fDirName;
1135 const std::string fTreeName;
1136 const RSnapshotOptions fOptions;
1137 std::unique_ptr<TFile> fOutputFile;
1138 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1139 bool fIsFirstEvent{true};
1140 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1141 const ColumnNames_t fOutputBranchNames;
1142 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1143 BoolArrayMap fBoolArrays; // Storage for C arrays of bools to be written out
1144 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1145 std::vector<void *> fBranchAddresses; // Addresses associated to output branches, non-null only for the ones holding C arrays
1146
1147public:
1148 using ColumnTypes_t = TypeList<BranchTypes...>;
1149 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1150 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options)
1151 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1152 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1153 fBranchAddresses(vbnames.size(), nullptr)
1154 {
1155 }
1156
1157 SnapshotHelper(const SnapshotHelper &) = delete;
1158 SnapshotHelper(SnapshotHelper &&) = default;
1159
1160 void InitTask(TTreeReader *r, unsigned int /* slot */)
1161 {
1162 if (!r) // empty source, nothing to do
1163 return;
1164 fInputTree = r->GetTree();
1165 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1166 // addresses of the branch values
1167 fInputTree->AddClone(fOutputTree.get());
1168 }
1169
1170 void Exec(unsigned int /* slot */, BranchTypes &... values)
1171 {
1172 using ind_t = std::index_sequence_for<BranchTypes...>;
1173 if (! fIsFirstEvent) {
1174 UpdateCArraysPtrs(values..., ind_t{});
1175 } else {
1176 SetBranches(values..., ind_t{});
1177 fIsFirstEvent = false;
1178 }
1179 UpdateBoolArrays(values..., ind_t{});
1180 fOutputTree->Fill();
1181 }
1182
1183 template <std::size_t... S>
1184 void UpdateCArraysPtrs(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1185 {
1186 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1187 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1188 // leaving associated to the branch of the output tree an invalid pointer.
1189 // With this code, we set the value of the pointer in the output branch anew when needed.
1190 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1191 // we need an int for the expander list.
1192 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1193 ? fBranches[S]->SetAddress(GetData(values)),
1194 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1195 0};
1196 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1197 }
1198
1199 template <std::size_t... S>
1200 void SetBranches(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1201 {
1202 // create branches in output tree (and fill fBoolArrays for RVec<bool> columns)
1203 int expander[] = {(SetBranchesHelper(fBoolArrays, fInputTree, *fOutputTree, fInputBranchNames[S],
1204 fOutputBranchNames[S], fBranches[S], fBranchAddresses[S], &values),
1205 0)...,
1206 0};
1207 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1208 }
1209
1210 template <std::size_t... S>
1211 void UpdateBoolArrays(BranchTypes &...values, std::index_sequence<S...> /*dummy*/)
1212 {
1213 int expander[] = {(UpdateBoolArray(fBoolArrays, values, fOutputBranchNames[S], *fOutputTree), 0)..., 0};
1214 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1215 }
1216
1217 void Initialize()
1218 {
1219 fOutputFile.reset(
1220 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1222
1223 if (!fDirName.empty()) {
1224 fOutputFile->mkdir(fDirName.c_str());
1225 fOutputFile->cd(fDirName.c_str());
1226 }
1227
1228 fOutputTree =
1229 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/fOutputFile.get());
1230
1231 if (fOptions.fAutoFlush)
1232 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1233 }
1234
1235 void Finalize()
1236 {
1237 if (fOutputFile && fOutputTree) {
1238 ::TDirectory::TContext ctxt(fOutputFile->GetDirectory(fDirName.c_str()));
1239 fOutputTree->Write();
1240 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1241 fOutputTree.reset();
1242 fOutputFile->Close();
1243 } else {
1244 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1245 }
1246 }
1247
1248 std::string GetActionName() { return "Snapshot"; }
1249};
1250
1251/// Helper object for a multi-thread Snapshot action
1252template <typename... BranchTypes>
1253class SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<BranchTypes...>> {
1254 const unsigned int fNSlots;
1255 std::unique_ptr<ROOT::Experimental::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1256 std::vector<std::shared_ptr<ROOT::Experimental::TBufferMergerFile>> fOutputFiles;
1257 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1258 std::vector<int> fIsFirstEvent; // vector<bool> does not allow concurrent writing of different elements
1259 const std::string fFileName; // name of the output file name
1260 const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1261 const std::string fTreeName; // name of output tree
1262 const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1263 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1264 const ColumnNames_t fOutputBranchNames;
1265 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1266 std::vector<BoolArrayMap> fBoolArrays; // Per-thread storage for C arrays of bools to be written out
1267 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1268 std::vector<std::vector<TBranch *>> fBranches;
1269 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1270 std::vector<std::vector<void *>> fBranchAddresses;
1271
1272public:
1273 using ColumnTypes_t = TypeList<BranchTypes...>;
1274 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1275 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1276 const RSnapshotOptions &options)
1277 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fIsFirstEvent(fNSlots, 1), fFileName(filename),
1278 fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1279 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots), fBoolArrays(fNSlots),
1280 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1281 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr))
1282 {
1283 }
1284 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1285 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1286
1287 void InitTask(TTreeReader *r, unsigned int slot)
1288 {
1289 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1290 if (!fOutputFiles[slot]) {
1291 // first time this thread executes something, let's create a TBufferMerger output directory
1292 fOutputFiles[slot] = fMerger->GetFile();
1293 }
1294 TDirectory *treeDirectory = fOutputFiles[slot].get();
1295 if (!fDirName.empty()) {
1296 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str());
1297 }
1298 // re-create output tree as we need to create its branches again, with new input variables
1299 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1300 fOutputTrees[slot] =
1301 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1302 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1303 fOutputTrees[slot]->SetImplicitMT(false);
1304 if (fOptions.fAutoFlush)
1305 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1306 if (r) {
1307 // not an empty-source RDF
1308 fInputTrees[slot] = r->GetTree();
1309 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1310 // addresses of the branch values. We need this in case of friend trees with different cluster granularity
1311 // than the main tree.
1312 // FIXME: AddClone might result in many many (safe) warnings printed by TTree::CopyAddresses, see ROOT-9487.
1313 const auto friendsListPtr = fInputTrees[slot]->GetListOfFriends();
1314 if (friendsListPtr && friendsListPtr->GetEntries() > 0)
1315 fInputTrees[slot]->AddClone(fOutputTrees[slot].get());
1316 }
1317 fIsFirstEvent[slot] = 1; // reset first event flag for this slot
1318 }
1319
1320 void FinalizeTask(unsigned int slot)
1321 {
1322 if (fOutputTrees[slot]->GetEntries() > 0)
1323 fOutputFiles[slot]->Write();
1324 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1325 fOutputTrees[slot].reset(nullptr);
1326 }
1327
1328 void Exec(unsigned int slot, BranchTypes &... values)
1329 {
1330 using ind_t = std::index_sequence_for<BranchTypes...>;
1331 if (!fIsFirstEvent[slot]) {
1332 UpdateCArraysPtrs(slot, values..., ind_t{});
1333 } else {
1334 SetBranches(slot, values..., ind_t{});
1335 fIsFirstEvent[slot] = 0;
1336 }
1337 UpdateBoolArrays(slot, values..., ind_t{});
1338 fOutputTrees[slot]->Fill();
1339 auto entries = fOutputTrees[slot]->GetEntries();
1340 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1341 if ((autoFlush > 0) && (entries % autoFlush == 0))
1342 fOutputFiles[slot]->Write();
1343 }
1344
1345 template <std::size_t... S>
1346 void UpdateCArraysPtrs(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1347 {
1348 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1349 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1350 // leaving associated to the branch of the output tree an invalid pointer.
1351 // With this code, we set the value of the pointer in the output branch anew when needed.
1352 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1353 // we need an int for the expander list.
1354 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1355 ? fBranches[slot][S]->SetAddress(GetData(values)),
1356 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1357 0};
1358 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1359 }
1360
1361 template <std::size_t... S>
1362 void SetBranches(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1363 {
1364 // hack to call TTree::Branch on all variadic template arguments
1365 int expander[] = {
1366 (SetBranchesHelper(fBoolArrays[slot], fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1367 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], &values),
1368 0)...,
1369 0};
1370 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1371 (void)slot; // avoid unused variable warnings in gcc6.2
1372 }
1373
1374 template <std::size_t... S>
1375 void UpdateBoolArrays(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1376 {
1377 int expander[] = {
1378 (UpdateBoolArray(fBoolArrays[slot], values, fOutputBranchNames[S], *fOutputTrees[slot]), 0)..., 0};
1379 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1380 }
1381
1382 void Initialize()
1383 {
1384 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1385 fMerger = std::make_unique<ROOT::Experimental::TBufferMerger>(fFileName.c_str(), fOptions.fMode.c_str(), cs);
1386 }
1387
1388 void Finalize()
1389 {
1390 auto fileWritten = false;
1391 for (auto &file : fOutputFiles) {
1392 if (file) {
1393 file->Write();
1394 file->Close();
1395 fileWritten = true;
1396 }
1397 }
1398
1399 if (!fileWritten) {
1400 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1401 }
1402
1403 // flush all buffers to disk by destroying the TBufferMerger
1404 fOutputFiles.clear();
1405 fMerger.reset();
1406 }
1407
1408 std::string GetActionName() { return "Snapshot"; }
1409};
1410
1411template <typename Acc, typename Merge, typename R, typename T, typename U,
1412 bool MustCopyAssign = std::is_same<R, U>::value>
1413class AggregateHelper : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1414 Acc fAggregate;
1415 Merge fMerge;
1416 const std::shared_ptr<U> fResult;
1417 Results<U> fAggregators;
1418
1419public:
1420 using ColumnTypes_t = TypeList<T>;
1421 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1422 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1423 {
1424 }
1425 AggregateHelper(AggregateHelper &&) = default;
1426 AggregateHelper(const AggregateHelper &) = delete;
1427
1428 void InitTask(TTreeReader *, unsigned int) {}
1429
1431 void Exec(unsigned int slot, const T &value)
1432 {
1433 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1434 }
1435
1437 void Exec(unsigned int slot, const T &value)
1438 {
1439 fAggregate(fAggregators[slot], value);
1440 }
1441
1442 void Initialize() { /* noop */}
1443
1444 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1445 bool MergeAll = std::is_same<void, MergeRet>::value>
1446 typename std::enable_if<MergeAll, void>::type Finalize()
1447 {
1448 fMerge(fAggregators);
1449 *fResult = fAggregators[0];
1450 }
1451
1452 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1453 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1454 typename std::enable_if<MergeTwoByTwo, void>::type Finalize(...) // ... needed to let compiler distinguish overloads
1455 {
1456 for (const auto &acc : fAggregators)
1457 *fResult = fMerge(*fResult, acc);
1458 }
1459
1460 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1461
1462 std::string GetActionName() { return "Aggregate"; }
1463};
1464
1465} // end of NS RDF
1466} // end of NS Internal
1467} // end of NS ROOT
1468
1469/// \endcond
1470
1471#endif
SVector< double, 2 > v
Definition: Dict.h:5
Handle_t Display_t
Definition: GuiTypes.h:26
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static const double x2[5]
static const double x1[5]
static const double x3[11]
unsigned long long ULong64_t
Definition: RtypesCore.h:70
void Warning(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
Binding & operator=(OUT(*fun)(void))
typedef void((*Func_t)())
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:64
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:272
iterator end() noexcept
Definition: RVec.hxx:387
const Impl_t & AsVector() const
Definition: RVec.hxx:348
iterator begin() noexcept
Definition: RVec.hxx:384
A TTree is a list of TBranches.
Definition: TBranch.h:65
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3980
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
The TH1 histogram class.
Definition: TH1.h:56
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:49
A doubly linked list.
Definition: TList.h:44
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
A TTree represents a columnar dataset.
Definition: TTree.h:71
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5075
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Definition: TTree.cxx:8049
void AddClone(TTree *)
Add a cloned tree to our list of trees to be notified whenever we change our branch addresses or when...
Definition: TTree.cxx:1186
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1741
Double_t x[n]
Definition: legend1.C:17
basic_string_view< char > string_view
#define F(x, y, z)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:270
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:230
double T(double x)
Definition: ChebyshevPol.h:34
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
@ kSTLvector
Definition: ESTLType.h:30
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:788
int CompressionSettings(RCompressionSetting::EAlgorithm algorithm, int compressionLevel)
RooArgSet S(const RooAbsArg &v1)
RooCmdArg Columns(Int_t ncol)
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
const char * Size
Definition: TXMLSetup.cxx:55
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.
Check for container traits.
Definition: TypeTraits.hxx:98
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
void ws()
Definition: ws.C:66