Logo ROOT  
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDFOPERATIONS
12#define ROOT_RDFOPERATIONS
13
14#include "Compression.h"
16#include "ROOT/RStringView.hxx"
17#include "ROOT/RVec.hxx"
18#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
20#include "ROOT/RDF/Utils.hxx"
21#include "ROOT/RMakeUnique.hxx"
23#include "ROOT/TypeTraits.hxx"
24#include "ROOT/RDF/RDisplay.hxx"
25#include "RtypesCore.h"
26#include "TBranch.h"
27#include "TClassEdit.h"
28#include "TDirectory.h"
29#include "TFile.h" // for SnapshotHelper
30#include "TH1.h"
31#include "TGraph.h"
32#include "TLeaf.h"
33#include "TObjArray.h"
34#include "TObject.h"
35#include "TTree.h"
36#include "TTreeReader.h" // for SnapshotHelper
37
38#include <algorithm>
39#include <limits>
40#include <memory>
41#include <stdexcept>
42#include <string>
43#include <type_traits>
44#include <vector>
45#include <iomanip>
46
47/// \cond HIDDEN_SYMBOLS
48
49namespace ROOT {
50namespace Detail {
51namespace RDF {
52template <typename Helper>
53class RActionImpl {
54public:
55 // call Helper::FinalizeTask if present, do nothing otherwise
56 template <typename T = Helper>
57 auto CallFinalizeTask(unsigned int slot) -> decltype(&T::FinalizeTask, void())
58 {
59 static_cast<Helper *>(this)->FinalizeTask(slot);
60 }
61
62 template <typename... Args>
63 void CallFinalizeTask(unsigned int, Args...) {}
64
65};
66
67} // namespace RDF
68} // namespace Detail
69
70namespace Internal {
71namespace RDF {
72using namespace ROOT::TypeTraits;
73using namespace ROOT::VecOps;
74using namespace ROOT::RDF;
75using namespace ROOT::Detail::RDF;
76
77using Hist_t = ::TH1D;
78
79/// The container type for each thread's partial result in an action helper
80// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
81// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
82// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
83template <typename T>
84using Results = typename std::conditional<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>::type;
85
86template <typename F>
87class ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
88 F fCallable;
89
90public:
91 using ColumnTypes_t = RemoveFirstParameter_t<typename CallableTraits<F>::arg_types>;
92 ForeachSlotHelper(F &&f) : fCallable(f) {}
93 ForeachSlotHelper(ForeachSlotHelper &&) = default;
94 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
95
96 void InitTask(TTreeReader *, unsigned int) {}
97
98 template <typename... Args>
99 void Exec(unsigned int slot, Args &&... args)
100 {
101 // check that the decayed types of Args are the same as the branch types
102 static_assert(std::is_same<TypeList<typename std::decay<Args>::type...>, ColumnTypes_t>::value, "");
103 fCallable(slot, std::forward<Args>(args)...);
104 }
105
106 void Initialize() { /* noop */}
107
108 void Finalize() { /* noop */}
109
110 std::string GetActionName() { return "ForeachSlot"; }
111};
112
113class CountHelper : public RActionImpl<CountHelper> {
114 const std::shared_ptr<ULong64_t> fResultCount;
115 Results<ULong64_t> fCounts;
116
117public:
118 using ColumnTypes_t = TypeList<>;
119 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
120 CountHelper(CountHelper &&) = default;
121 CountHelper(const CountHelper &) = delete;
122 void InitTask(TTreeReader *, unsigned int) {}
123 void Exec(unsigned int slot);
124 void Initialize() { /* noop */}
125 void Finalize();
126 ULong64_t &PartialUpdate(unsigned int slot);
127
128 std::string GetActionName() { return "Count"; }
129};
130
131template <typename ProxiedVal_t>
132class ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
133 const std::shared_ptr<RCutFlowReport> fReport;
134 // Here we have a weak pointer since we need to keep track of the validity
135 // of the proxied node. It can happen that the user does not trigger the
136 // event loop by looking into the RResultPtr and the chain goes out of scope
137 // before the Finalize method is invoked.
138 std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
139 bool fReturnEmptyReport;
140
141public:
142 using ColumnTypes_t = TypeList<>;
143 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
144 : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
145 ReportHelper(ReportHelper &&) = default;
146 ReportHelper(const ReportHelper &) = delete;
147 void InitTask(TTreeReader *, unsigned int) {}
148 void Exec(unsigned int /* slot */) {}
149 void Initialize() { /* noop */}
150 void Finalize()
151 {
152 // We need the weak_ptr in order to avoid crashes at tear down
153 if (!fReturnEmptyReport && !fProxiedWPtr.expired())
154 fProxiedWPtr.lock()->Report(*fReport);
155 }
156
157 std::string GetActionName() { return "Report"; }
158};
159
160class FillHelper : public RActionImpl<FillHelper> {
161 // this sets a total initial size of 16 MB for the buffers (can increase)
162 static constexpr unsigned int fgTotalBufSize = 2097152;
163 using BufEl_t = double;
164 using Buf_t = std::vector<BufEl_t>;
165
166 std::vector<Buf_t> fBuffers;
167 std::vector<Buf_t> fWBuffers;
168 const std::shared_ptr<Hist_t> fResultHist;
169 unsigned int fNSlots;
170 unsigned int fBufSize;
171 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
172 Results<std::unique_ptr<Hist_t>> fPartialHists;
173 Buf_t fMin;
174 Buf_t fMax;
175
176 void UpdateMinMax(unsigned int slot, double v);
177
178public:
179 FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
180 FillHelper(FillHelper &&) = default;
181 FillHelper(const FillHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot, double v);
184 void Exec(unsigned int slot, double v, double w);
185
186 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
187 void Exec(unsigned int slot, const T &vs)
188 {
189 auto &thisBuf = fBuffers[slot];
190 for (auto &v : vs) {
191 UpdateMinMax(slot, v);
192 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
193 }
194 }
195
196 template <typename T, typename W,
197 typename std::enable_if<IsContainer<T>::value && IsContainer<W>::value, int>::type = 0>
198 void Exec(unsigned int slot, const T &vs, const W &ws)
199 {
200 auto &thisBuf = fBuffers[slot];
201
202 for (auto &v : vs) {
203 UpdateMinMax(slot, v);
204 thisBuf.emplace_back(v);
205 }
206
207 auto &thisWBuf = fWBuffers[slot];
208 for (auto &w : ws) {
209 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
210 }
211 }
212
213 template <typename T, typename W,
214 typename std::enable_if<IsContainer<T>::value && !IsContainer<W>::value, int>::type = 0>
215 void Exec(unsigned int slot, const T &vs, const W w)
216 {
217 auto &thisBuf = fBuffers[slot];
218 for (auto &v : vs) {
219 UpdateMinMax(slot, v);
220 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
221 }
222
223 auto &thisWBuf = fWBuffers[slot];
224 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
225 }
226
227 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
228 template <typename T, typename W,
229 typename std::enable_if<IsContainer<W>::value && !IsContainer<T>::value, int>::type = 0>
230 void Exec(unsigned int, const T &, const W &)
231 {
232 throw std::runtime_error(
233 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
234 }
235
236 Hist_t &PartialUpdate(unsigned int);
237
238 void Initialize() { /* noop */}
239
240 void Finalize();
241
242 std::string GetActionName() { return "Fill"; }
243};
244
245extern template void FillHelper::Exec(unsigned int, const std::vector<float> &);
246extern template void FillHelper::Exec(unsigned int, const std::vector<double> &);
247extern template void FillHelper::Exec(unsigned int, const std::vector<char> &);
248extern template void FillHelper::Exec(unsigned int, const std::vector<int> &);
249extern template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
250extern template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
251extern template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
252extern template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
253extern template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
254extern template void
255FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
256
257template <typename HIST = Hist_t>
258class FillParHelper : public RActionImpl<FillParHelper<HIST>> {
259 std::vector<HIST *> fObjects;
260
261public:
262 FillParHelper(FillParHelper &&) = default;
263 FillParHelper(const FillParHelper &) = delete;
264
265 FillParHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
266 {
267 fObjects[0] = h.get();
268 // Initialise all other slots
269 for (unsigned int i = 1; i < nSlots; ++i) {
270 fObjects[i] = new HIST(*fObjects[0]);
271 if (auto objAsHist = dynamic_cast<TH1*>(fObjects[i])) {
272 objAsHist->SetDirectory(nullptr);
273 }
274 }
275 }
276
277 void InitTask(TTreeReader *, unsigned int) {}
278
279 void Exec(unsigned int slot, double x0) // 1D histos
280 {
281 fObjects[slot]->Fill(x0);
282 }
283
284 void Exec(unsigned int slot, double x0, double x1) // 1D weighted and 2D histos
285 {
286 fObjects[slot]->Fill(x0, x1);
287 }
288
289 void Exec(unsigned int slot, double x0, double x1, double x2) // 2D weighted and 3D histos
290 {
291 fObjects[slot]->Fill(x0, x1, x2);
292 }
293
294 void Exec(unsigned int slot, double x0, double x1, double x2, double x3) // 3D weighted histos
295 {
296 fObjects[slot]->Fill(x0, x1, x2, x3);
297 }
298
299 template <typename X0, typename std::enable_if<IsContainer<X0>::value, int>::type = 0>
300 void Exec(unsigned int slot, const X0 &x0s)
301 {
302 auto thisSlotH = fObjects[slot];
303 for (auto &x0 : x0s) {
304 thisSlotH->Fill(x0); // TODO: Can be optimised in case T == vector<double>
305 }
306 }
307
308 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
309 template <typename X0, typename X1,
310 typename std::enable_if<IsContainer<X1>::value && !IsContainer<X0>::value, int>::type = 0>
311 void Exec(unsigned int , const X0 &, const X1 &)
312 {
313 throw std::runtime_error(
314 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
315 }
316
317 template <typename X0, typename X1,
318 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value, int>::type = 0>
319 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
320 {
321 auto thisSlotH = fObjects[slot];
322 if (x0s.size() != x1s.size()) {
323 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
324 }
325 auto x0sIt = std::begin(x0s);
326 const auto x0sEnd = std::end(x0s);
327 auto x1sIt = std::begin(x1s);
328 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
329 thisSlotH->Fill(*x0sIt, *x1sIt); // TODO: Can be optimised in case T == vector<double>
330 }
331 }
332
333 template <typename X0, typename W,
334 typename std::enable_if<IsContainer<X0>::value && !IsContainer<W>::value, int>::type = 0>
335 void Exec(unsigned int slot, const X0 &x0s, const W w)
336 {
337 auto thisSlotH = fObjects[slot];
338 for (auto &&x : x0s) {
339 thisSlotH->Fill(x, w);
340 }
341 }
342
343 template <typename X0, typename X1, typename X2,
344 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value,
345 int>::type = 0>
346 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s)
347 {
348 auto thisSlotH = fObjects[slot];
349 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
350 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
351 }
352 auto x0sIt = std::begin(x0s);
353 const auto x0sEnd = std::end(x0s);
354 auto x1sIt = std::begin(x1s);
355 auto x2sIt = std::begin(x2s);
356 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
357 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt); // TODO: Can be optimised in case T == vector<double>
358 }
359 }
360
361 template <typename X0, typename X1, typename W,
362 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && !IsContainer<W>::value,
363 int>::type = 0>
364 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const W w)
365 {
366 auto thisSlotH = fObjects[slot];
367 if (x0s.size() != x1s.size()) {
368 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
369 }
370 auto x0sIt = std::begin(x0s);
371 const auto x0sEnd = std::end(x0s);
372 auto x1sIt = std::begin(x1s);
373 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
374 thisSlotH->Fill(*x0sIt, *x1sIt, w); // TODO: Can be optimised in case T == vector<double>
375 }
376 }
377
378 template <typename X0, typename X1, typename X2, typename X3,
379 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
381 int>::type = 0>
382 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const X3 &x3s)
383 {
384 auto thisSlotH = fObjects[slot];
385 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size() && x1s.size() == x3s.size())) {
386 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
387 }
388 auto x0sIt = std::begin(x0s);
389 const auto x0sEnd = std::end(x0s);
390 auto x1sIt = std::begin(x1s);
391 auto x2sIt = std::begin(x2s);
392 auto x3sIt = std::begin(x3s);
393 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++, x3sIt++) {
394 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, *x3sIt); // TODO: Can be optimised in case T == vector<double>
395 }
396 }
397
398 template <typename X0, typename X1, typename X2, typename W,
399 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
401 int>::type = 0>
402 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const W w)
403 {
404 auto thisSlotH = fObjects[slot];
405 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
406 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
407 }
408 auto x0sIt = std::begin(x0s);
409 const auto x0sEnd = std::end(x0s);
410 auto x1sIt = std::begin(x1s);
411 auto x2sIt = std::begin(x2s);
412 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
413 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, w);
414 }
415 }
416
417 void Initialize() { /* noop */}
418
419 void Finalize()
420 {
421 auto resObj = fObjects[0];
422 const auto nSlots = fObjects.size();
423 TList l;
424 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
425 for (unsigned int slot = 1; slot < nSlots; ++slot) {
426 l.Add(fObjects[slot]);
427 }
428
429 resObj->Merge(&l);
430 }
431
432 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
433
434 std::string GetActionName() { return "FillPar"; }
435};
436
437class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
438public:
439 using Result_t = ::TGraph;
440
441private:
442 std::vector<::TGraph *> fGraphs;
443
444public:
445 FillTGraphHelper(FillTGraphHelper &&) = default;
446 FillTGraphHelper(const FillTGraphHelper &) = delete;
447
448 // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
449 // this method
450 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
451 {
452 fGraphs[0] = g.get();
453 // Initialise all other slots
454 for (unsigned int i = 1; i < nSlots; ++i) {
455 fGraphs[i] = new TGraph(*fGraphs[0]);
456 }
457 }
458
459 void Initialize() {}
460 void InitTask(TTreeReader *, unsigned int) {}
461
462 template <typename X0, typename X1,
463 typename std::enable_if<
465 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
466 {
467 if (x0s.size() != x1s.size()) {
468 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
469 }
470 auto thisSlotG = fGraphs[slot];
471 auto x0sIt = std::begin(x0s);
472 const auto x0sEnd = std::end(x0s);
473 auto x1sIt = std::begin(x1s);
474 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
475 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
476 }
477 }
478
479 template <typename X0, typename X1>
480 void Exec(unsigned int slot, X0 x0, X1 x1)
481 {
482 auto thisSlotG = fGraphs[slot];
483 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
484 }
485
486 void Finalize()
487 {
488 const auto nSlots = fGraphs.size();
489 auto resGraph = fGraphs[0];
490 TList l;
491 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
492 for (unsigned int slot = 1; slot < nSlots; ++slot) {
493 l.Add(fGraphs[slot]);
494 }
495 resGraph->Merge(&l);
496 }
497
498 std::string GetActionName() { return "Graph"; }
499
500 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
501};
502
503// In case of the take helper we have 4 cases:
504// 1. The column is not an RVec, the collection is not a vector
505// 2. The column is not an RVec, the collection is a vector
506// 3. The column is an RVec, the collection is not a vector
507// 4. The column is an RVec, the collection is a vector
508
509template <typename V, typename COLL>
510void FillColl(V&& v, COLL& c) {
511 c.emplace_back(v);
512}
513
514// Use push_back for bool since some compilers do not support emplace_back.
515template <typename COLL>
516void FillColl(bool v, COLL& c) {
517 c.push_back(v);
518}
519
520// Case 1.: The column is not an RVec, the collection is not a vector
521// No optimisations, no transformations: just copies.
522template <typename RealT_t, typename T, typename COLL>
523class TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
524 Results<std::shared_ptr<COLL>> fColls;
525
526public:
527 using ColumnTypes_t = TypeList<T>;
528 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
529 {
530 fColls.emplace_back(resultColl);
531 for (unsigned int i = 1; i < nSlots; ++i)
532 fColls.emplace_back(std::make_shared<COLL>());
533 }
534 TakeHelper(TakeHelper &&);
535 TakeHelper(const TakeHelper &) = delete;
536
537 void InitTask(TTreeReader *, unsigned int) {}
538
539 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
540
541 void Initialize() { /* noop */}
542
543 void Finalize()
544 {
545 auto rColl = fColls[0];
546 for (unsigned int i = 1; i < fColls.size(); ++i) {
547 const auto &coll = fColls[i];
548 const auto end = coll->end();
549 // Use an explicit loop here to prevent compiler warnings introduced by
550 // clang's range-based loop analysis and vector<bool> references.
551 for (auto j = coll->begin(); j != end; j++) {
552 FillColl(*j, *rColl);
553 }
554 }
555 }
556
557 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
558
559 std::string GetActionName() { return "Take"; }
560};
561
562// Case 2.: The column is not an RVec, the collection is a vector
563// Optimisations, no transformations: just copies.
564template <typename RealT_t, typename T>
565class TakeHelper<RealT_t, T, std::vector<T>> : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
566 Results<std::shared_ptr<std::vector<T>>> fColls;
567
568public:
569 using ColumnTypes_t = TypeList<T>;
570 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
571 {
572 fColls.emplace_back(resultColl);
573 for (unsigned int i = 1; i < nSlots; ++i) {
574 auto v = std::make_shared<std::vector<T>>();
575 v->reserve(1024);
576 fColls.emplace_back(v);
577 }
578 }
579 TakeHelper(TakeHelper &&);
580 TakeHelper(const TakeHelper &) = delete;
581
582 void InitTask(TTreeReader *, unsigned int) {}
583
584 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
585
586 void Initialize() { /* noop */}
587
588 // This is optimised to treat vectors
589 void Finalize()
590 {
591 ULong64_t totSize = 0;
592 for (auto &coll : fColls)
593 totSize += coll->size();
594 auto rColl = fColls[0];
595 rColl->reserve(totSize);
596 for (unsigned int i = 1; i < fColls.size(); ++i) {
597 auto &coll = fColls[i];
598 rColl->insert(rColl->end(), coll->begin(), coll->end());
599 }
600 }
601
602 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
603
604 std::string GetActionName() { return "Take"; }
605};
606
607// Case 3.: The column is a RVec, the collection is not a vector
608// No optimisations, transformations from RVecs to vectors
609template <typename RealT_t, typename COLL>
610class TakeHelper<RealT_t, RVec<RealT_t>, COLL> : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
611 Results<std::shared_ptr<COLL>> fColls;
612
613public:
614 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
615 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
616 {
617 fColls.emplace_back(resultColl);
618 for (unsigned int i = 1; i < nSlots; ++i)
619 fColls.emplace_back(std::make_shared<COLL>());
620 }
621 TakeHelper(TakeHelper &&);
622 TakeHelper(const TakeHelper &) = delete;
623
624 void InitTask(TTreeReader *, unsigned int) {}
625
626 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
627
628 void Initialize() { /* noop */}
629
630 void Finalize()
631 {
632 auto rColl = fColls[0];
633 for (unsigned int i = 1; i < fColls.size(); ++i) {
634 auto &coll = fColls[i];
635 for (auto &v : *coll) {
636 rColl->emplace_back(v);
637 }
638 }
639 }
640
641 std::string GetActionName() { return "Take"; }
642};
643
644// Case 4.: The column is an RVec, the collection is a vector
645// Optimisations, transformations from RVecs to vectors
646template <typename RealT_t>
647class TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
648 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
649
650 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
651
652public:
653 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
654 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
655 {
656 fColls.emplace_back(resultColl);
657 for (unsigned int i = 1; i < nSlots; ++i) {
658 auto v = std::make_shared<std::vector<RealT_t>>();
659 v->reserve(1024);
660 fColls.emplace_back(v);
661 }
662 }
663 TakeHelper(TakeHelper &&);
664 TakeHelper(const TakeHelper &) = delete;
665
666 void InitTask(TTreeReader *, unsigned int) {}
667
668 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
669
670 void Initialize() { /* noop */}
671
672 // This is optimised to treat vectors
673 void Finalize()
674 {
675 ULong64_t totSize = 0;
676 for (auto &coll : fColls)
677 totSize += coll->size();
678 auto rColl = fColls[0];
679 rColl->reserve(totSize);
680 for (unsigned int i = 1; i < fColls.size(); ++i) {
681 auto &coll = fColls[i];
682 rColl->insert(rColl->end(), coll->begin(), coll->end());
683 }
684 }
685
686 std::string GetActionName() { return "Take"; }
687};
688
689// Extern templates for TakeHelper
690// NOTE: The move-constructor of specializations declared as extern templates
691// must be defined out of line, otherwise cling fails to find its symbol.
692template <typename RealT_t, typename T, typename COLL>
693TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
694template <typename RealT_t, typename T>
695TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
696template <typename RealT_t, typename COLL>
697TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
698template <typename RealT_t>
699TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
700
701// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
702#if __GNUC__ > 5
703extern template class TakeHelper<bool, bool, std::vector<bool>>;
704extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
705extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
706extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
707extern template class TakeHelper<int, int, std::vector<int>>;
708extern template class TakeHelper<long, long, std::vector<long>>;
709extern template class TakeHelper<long long, long long, std::vector<long long>>;
710extern template class TakeHelper<float, float, std::vector<float>>;
711extern template class TakeHelper<double, double, std::vector<double>>;
712#endif
713
714
715template <typename ResultType>
716class MinHelper : public RActionImpl<MinHelper<ResultType>> {
717 const std::shared_ptr<ResultType> fResultMin;
718 Results<ResultType> fMins;
719
720public:
721 MinHelper(MinHelper &&) = default;
722 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
723 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
724 {
725 }
726
727 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
728
729 void InitTask(TTreeReader *, unsigned int) {}
730
731 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
732 void Exec(unsigned int slot, const T &vs)
733 {
734 for (auto &&v : vs)
735 fMins[slot] = std::min(v, fMins[slot]);
736 }
737
738 void Initialize() { /* noop */}
739
740 void Finalize()
741 {
742 *fResultMin = std::numeric_limits<ResultType>::max();
743 for (auto &m : fMins)
744 *fResultMin = std::min(m, *fResultMin);
745 }
746
747 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
748
749 std::string GetActionName() { return "Min"; }
750};
751
752// TODO
753// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
754// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
755// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
756// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
757// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
758
759template <typename ResultType>
760class MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
761 const std::shared_ptr<ResultType> fResultMax;
762 Results<ResultType> fMaxs;
763
764public:
765 MaxHelper(MaxHelper &&) = default;
766 MaxHelper(const MaxHelper &) = delete;
767 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
768 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
769 {
770 }
771
772 void InitTask(TTreeReader *, unsigned int) {}
773 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
774
775 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
776 void Exec(unsigned int slot, const T &vs)
777 {
778 for (auto &&v : vs)
779 fMaxs[slot] = std::max((ResultType)v, fMaxs[slot]);
780 }
781
782 void Initialize() { /* noop */}
783
784 void Finalize()
785 {
786 *fResultMax = std::numeric_limits<ResultType>::lowest();
787 for (auto &m : fMaxs) {
788 *fResultMax = std::max(m, *fResultMax);
789 }
790 }
791
792 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
793
794 std::string GetActionName() { return "Max"; }
795};
796
797// TODO
798// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
799// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
800// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
801// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
802// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
803
804template <typename ResultType>
805class SumHelper : public RActionImpl<SumHelper<ResultType>> {
806 const std::shared_ptr<ResultType> fResultSum;
807 Results<ResultType> fSums;
808
809 /// Evaluate neutral element for this type and the sum operation.
810 /// This is assumed to be any_value - any_value if operator- is defined
811 /// for the type, otherwise a default-constructed ResultType{} is used.
812 template <typename T = ResultType>
813 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
814 {
815 return v - v;
816 }
817
818 template <typename T = ResultType, typename Dummy = int>
819 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
820 {
821 return ResultType{};
822 }
823
824public:
825 SumHelper(SumHelper &&) = default;
826 SumHelper(const SumHelper &) = delete;
827 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
828 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1))
829 {
830 }
831
832 void InitTask(TTreeReader *, unsigned int) {}
833 void Exec(unsigned int slot, ResultType v) { fSums[slot] += v; }
834
835 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
836 void Exec(unsigned int slot, const T &vs)
837 {
838 for (auto &&v : vs)
839 fSums[slot] += static_cast<ResultType>(v);
840 }
841
842 void Initialize() { /* noop */}
843
844 void Finalize()
845 {
846 for (auto &m : fSums)
847 *fResultSum += m;
848 }
849
850 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
851
852 std::string GetActionName() { return "Sum"; }
853};
854
855class MeanHelper : public RActionImpl<MeanHelper> {
856 const std::shared_ptr<double> fResultMean;
857 std::vector<ULong64_t> fCounts;
858 std::vector<double> fSums;
859 std::vector<double> fPartialMeans;
860
861public:
862 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
863 MeanHelper(MeanHelper &&) = default;
864 MeanHelper(const MeanHelper &) = delete;
865 void InitTask(TTreeReader *, unsigned int) {}
866 void Exec(unsigned int slot, double v);
867
868 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
869 void Exec(unsigned int slot, const T &vs)
870 {
871 for (auto &&v : vs) {
872 fSums[slot] += v;
873 fCounts[slot]++;
874 }
875 }
876
877 void Initialize() { /* noop */}
878
879 void Finalize();
880
881 double &PartialUpdate(unsigned int slot);
882
883 std::string GetActionName() { return "Mean"; }
884};
885
886extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
887extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
888extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
889extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
890extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
891
892class StdDevHelper : public RActionImpl<StdDevHelper> {
893 // Number of subsets of data
894 const unsigned int fNSlots;
895 const std::shared_ptr<double> fResultStdDev;
896 // Number of element for each slot
897 std::vector<ULong64_t> fCounts;
898 // Mean of each slot
899 std::vector<double> fMeans;
900 // Squared distance from the mean
901 std::vector<double> fDistancesfromMean;
902
903public:
904 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
905 StdDevHelper(StdDevHelper &&) = default;
906 StdDevHelper(const StdDevHelper &) = delete;
907 void InitTask(TTreeReader *, unsigned int) {}
908 void Exec(unsigned int slot, double v);
909
910 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
911 void Exec(unsigned int slot, const T &vs)
912 {
913 for (auto &&v : vs) {
914 Exec(slot, v);
915 }
916 }
917
918 void Initialize() { /* noop */}
919
920 void Finalize();
921
922 std::string GetActionName() { return "StdDev"; }
923};
924
925extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
926extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
927extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
928extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
929extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
930
931template <typename PrevNodeType>
932class DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
933private:
935 const std::shared_ptr<Display_t> fDisplayerHelper;
936 const std::shared_ptr<PrevNodeType> fPrevNode;
937
938public:
939 DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
940 : fDisplayerHelper(d), fPrevNode(prevNode)
941 {
942 }
943 DisplayHelper(DisplayHelper &&) = default;
944 DisplayHelper(const DisplayHelper &) = delete;
945 void InitTask(TTreeReader *, unsigned int) {}
946
947 template <typename... Columns>
948 void Exec(unsigned int, Columns... columns)
949 {
950 fDisplayerHelper->AddRow(columns...);
951 if (!fDisplayerHelper->HasNext()) {
952 fPrevNode->StopProcessing();
953 }
954 }
955
956 void Initialize() {}
957
958 void Finalize() {}
959
960 std::string GetActionName() { return "Display"; }
961};
962
963// std::vector<bool> is special, and not in a good way. As a consequence Snapshot of RVec<bool> needs to be treated
964// specially. In particular, if RVec<bool> is filled with a (fixed or variable size) boolean array coming from
965// a ROOT file, when writing out the correspinding branch from a Snapshot we do not have an address to set for the
966// TTree branch (std::vector<bool> and, consequently, RVec<bool> do not provide a `data()` method).
967// Bools is a lightweight wrapper around a C array of booleans that is meant to provide a stable address for the
968// output TTree to read the contents of the snapshotted branches at Fill time.
969class BoolArray {
970 std::size_t fSize = 0;
971 bool *fBools = nullptr;
972
973 bool *CopyVector(const RVec<bool> &v)
974 {
975 auto b = new bool[fSize];
976 std::copy(v.begin(), v.end(), b);
977 return b;
978 }
979
980 bool *CopyArray(bool *o, std::size_t size)
981 {
982 auto b = new bool[size];
983 for (auto i = 0u; i < size; ++i)
984 b[i] = o[i];
985 return b;
986 }
987
988public:
989 // this generic constructor could be replaced with a constexpr if in SetBranchesHelper
990 BoolArray() = default;
991 template <typename T>
992 BoolArray(const T &) { throw std::runtime_error("This constructor should never be called"); }
993 BoolArray(const RVec<bool> &v) : fSize(v.size()), fBools(CopyVector(v)) {}
994 BoolArray(const BoolArray &b)
995 {
996 CopyArray(b.fBools, b.fSize);
997 }
998 BoolArray &operator=(const BoolArray &b)
999 {
1000 delete[] fBools;
1001 CopyArray(b.fBools, b.fSize);
1002 return *this;
1003 }
1004 BoolArray(BoolArray &&b)
1005 {
1006 fSize = b.fSize;
1007 fBools = b.fBools;
1008 b.fSize = 0;
1009 b.fBools = nullptr;
1010 }
1011 BoolArray &operator=(BoolArray &&b)
1012 {
1013 delete[] fBools;
1014 fSize = b.fSize;
1015 fBools = b.fBools;
1016 b.fSize = 0;
1017 b.fBools = nullptr;
1018 return *this;
1019 }
1020 ~BoolArray() { delete[] fBools; }
1021 std::size_t Size() const { return fSize; }
1022 bool *Data() { return fBools; }
1023};
1024using BoolArrayMap = std::map<std::string, BoolArray>;
1025
1026inline bool *UpdateBoolArrayIfBool(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName)
1027{
1028 // create a boolArrays entry
1029 boolArrays[outName] = BoolArray(v);
1030 return boolArrays[outName].Data();
1031}
1032
1033template <typename T>
1034T *UpdateBoolArrayIfBool(BoolArrayMap &, RVec<T> &v, const std::string &)
1035{
1036 return v.data();
1037}
1038
1039// Helper which gets the return value of the data() method if the type is an
1040// RVec (of anything but a bool), nullptr otherwise.
1041inline void *GetData(ROOT::VecOps::RVec<bool> & /*v*/)
1042{
1043 return nullptr;
1044}
1045
1046template <typename T>
1047void *GetData(ROOT::VecOps::RVec<T> &v)
1048{
1049 return v.data();
1050}
1051
1052template <typename T>
1053void *GetData(T & /*v*/)
1054{
1055 return nullptr;
1056}
1057
1058
1059template <typename T>
1060void SetBranchesHelper(BoolArrayMap &, TTree * /*inputTree*/, TTree &outputTree, const std::string & /*validName*/,
1061 const std::string &name, TBranch *& branch, void *& branchAddress, T *address)
1062{
1063 outputTree.Branch(name.c_str(), address);
1064 branch = nullptr;
1065 branchAddress = nullptr;
1066}
1067
1068/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1069/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1070/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch title
1071/// 2. RVecs coming from a custom column or a source
1072/// 3. vectors coming from ROOT files
1073/// 4. TClonesArray
1074///
1075/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1076/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1077template <typename T>
1078void SetBranchesHelper(BoolArrayMap &boolArrays, TTree *inputTree, TTree &outputTree, const std::string &inName,
1079 const std::string &outName, TBranch *&branch, void *&branchAddress, RVec<T> *ab)
1080{
1081 auto *const inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1082 const bool isTClonesArray = inputBranch != nullptr && std::string(inputBranch->GetClassName()) == "TClonesArray";
1083 const auto mustWriteStdVec = !inputBranch || isTClonesArray ||
1084 ROOT::ESTLType::kSTLvector == TClassEdit::IsSTLCont(inputBranch->GetClassName());
1085
1086 if (mustWriteStdVec) {
1087 // Treat:
1088 // 2. RVec coming from a custom column or a source
1089 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1090 // 4. TClonesArray.
1091 // In all cases, we write out a std::vector<T> when the column is RVec<T>
1092 if (isTClonesArray) {
1093 Warning("Snapshot",
1094 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1095 "be written out as a std::vector instead of a TClonesArray. Specify that the type of the branch is "
1096 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.", inName.c_str());
1097 }
1098 outputTree.Branch(outName.c_str(), &ab->AsVector());
1099 return;
1100 }
1101
1102 // Treat 1, the C-array case
1103 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1104 const auto bname = leaf->GetName();
1105 const auto counterStr =
1106 leaf->GetLeafCount() ? std::string(leaf->GetLeafCount()->GetName()) : std::to_string(leaf->GetLenStatic());
1107 const auto btype = leaf->GetTypeName();
1108 const auto rootbtype = TypeName2ROOTTypeName(btype);
1109 const auto leaflist = std::string(bname) + "[" + counterStr + "]/" + rootbtype;
1110
1111 /// RVec<bool> is special because std::vector<bool> is special. In particular, it has no `data()`,
1112 /// so we need to explicitly manage storage of the data that the tree needs to Fill branches with.
1113 auto dataPtr = UpdateBoolArrayIfBool(boolArrays, *ab, outName);
1114
1115 auto *const outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1116 outputBranch->SetTitle(inputBranch->GetTitle());
1117
1118 // Record the branch ptr and the address associated to it if this is not a bool array
1119 if (!std::is_same<bool, T>::value) {
1120 branch = outputBranch;
1121 branchAddress = GetData(*ab);
1122 }
1123}
1124
1125// generic version, no-op
1126template <typename T>
1127void UpdateBoolArray(BoolArrayMap &, T&, const std::string &, TTree &) {}
1128
1129// RVec<bool> overload, update boolArrays if needed
1130inline void UpdateBoolArray(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName, TTree &t)
1131{
1132 if (v.size() > boolArrays[outName].Size()) {
1133 boolArrays[outName] = BoolArray(v); // resize and copy
1134 t.SetBranchAddress(outName.c_str(), boolArrays[outName].Data());
1135 }
1136 else {
1137 std::copy(v.begin(), v.end(), boolArrays[outName].Data()); // just copy
1138 }
1139}
1140
1141/// Helper object for a single-thread Snapshot action
1142template <typename... BranchTypes>
1143class SnapshotHelper : public RActionImpl<SnapshotHelper<BranchTypes...>> {
1144 const std::string fFileName;
1145 const std::string fDirName;
1146 const std::string fTreeName;
1147 const RSnapshotOptions fOptions;
1148 std::unique_ptr<TFile> fOutputFile;
1149 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1150 bool fIsFirstEvent{true};
1151 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1152 const ColumnNames_t fOutputBranchNames;
1153 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1154 BoolArrayMap fBoolArrays; // Storage for C arrays of bools to be written out
1155 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1156 std::vector<void *> fBranchAddresses; // Addresses associated to output branches, non-null only for the ones holding C arrays
1157
1158public:
1159 using ColumnTypes_t = TypeList<BranchTypes...>;
1160 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1161 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options)
1162 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1163 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1164 fBranchAddresses(vbnames.size(), nullptr)
1165 {
1166 }
1167
1168 SnapshotHelper(const SnapshotHelper &) = delete;
1169 SnapshotHelper(SnapshotHelper &&) = default;
1170
1171 void InitTask(TTreeReader *r, unsigned int /* slot */)
1172 {
1173 if (!r) // empty source, nothing to do
1174 return;
1175 fInputTree = r->GetTree();
1176 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1177 // addresses of the branch values
1178 fInputTree->AddClone(fOutputTree.get());
1179 }
1180
1181 void Exec(unsigned int /* slot */, BranchTypes &... values)
1182 {
1183 using ind_t = std::index_sequence_for<BranchTypes...>;
1184 if (! fIsFirstEvent) {
1185 UpdateCArraysPtrs(values..., ind_t{});
1186 } else {
1187 SetBranches(values..., ind_t{});
1188 fIsFirstEvent = false;
1189 }
1190 UpdateBoolArrays(values..., ind_t{});
1191 fOutputTree->Fill();
1192 }
1193
1194 template <std::size_t... S>
1195 void UpdateCArraysPtrs(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1196 {
1197 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1198 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1199 // leaving associated to the branch of the output tree an invalid pointer.
1200 // With this code, we set the value of the pointer in the output branch anew when needed.
1201 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1202 // we need an int for the expander list.
1203 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1204 ? fBranches[S]->SetAddress(GetData(values)),
1205 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1206 0};
1207 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1208 }
1209
1210 template <std::size_t... S>
1211 void SetBranches(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1212 {
1213 // create branches in output tree (and fill fBoolArrays for RVec<bool> columns)
1214 int expander[] = {(SetBranchesHelper(fBoolArrays, fInputTree, *fOutputTree, fInputBranchNames[S],
1215 fOutputBranchNames[S], fBranches[S], fBranchAddresses[S], &values),
1216 0)...,
1217 0};
1218 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1219 }
1220
1221 template <std::size_t... S>
1222 void UpdateBoolArrays(BranchTypes &...values, std::index_sequence<S...> /*dummy*/)
1223 {
1224 int expander[] = {(UpdateBoolArray(fBoolArrays, values, fOutputBranchNames[S], *fOutputTree), 0)..., 0};
1225 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1226 }
1227
1228 void Initialize()
1229 {
1230 fOutputFile.reset(
1231 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1233
1234 if (!fDirName.empty()) {
1235 fOutputFile->mkdir(fDirName.c_str());
1236 fOutputFile->cd(fDirName.c_str());
1237 }
1238
1239 fOutputTree =
1240 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/fOutputFile.get());
1241
1242 if (fOptions.fAutoFlush)
1243 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1244 }
1245
1246 void Finalize()
1247 {
1248 if (fOutputFile && fOutputTree) {
1249 ::TDirectory::TContext ctxt(fOutputFile->GetDirectory(fDirName.c_str()));
1250 fOutputTree->Write();
1251 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1252 fOutputTree.reset();
1253 fOutputFile->Close();
1254 } else {
1255 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1256 }
1257 }
1258
1259 std::string GetActionName() { return "Snapshot"; }
1260};
1261
1262/// Helper object for a multi-thread Snapshot action
1263template <typename... BranchTypes>
1264class SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<BranchTypes...>> {
1265 const unsigned int fNSlots;
1266 std::unique_ptr<ROOT::Experimental::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1267 std::vector<std::shared_ptr<ROOT::Experimental::TBufferMergerFile>> fOutputFiles;
1268 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1269 std::vector<int> fIsFirstEvent; // vector<bool> does not allow concurrent writing of different elements
1270 const std::string fFileName; // name of the output file name
1271 const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1272 const std::string fTreeName; // name of output tree
1273 const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1274 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1275 const ColumnNames_t fOutputBranchNames;
1276 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1277 std::vector<BoolArrayMap> fBoolArrays; // Per-thread storage for C arrays of bools to be written out
1278 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1279 std::vector<std::vector<TBranch *>> fBranches;
1280 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1281 std::vector<std::vector<void *>> fBranchAddresses;
1282
1283public:
1284 using ColumnTypes_t = TypeList<BranchTypes...>;
1285 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1286 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1287 const RSnapshotOptions &options)
1288 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fIsFirstEvent(fNSlots, 1), fFileName(filename),
1289 fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1290 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots), fBoolArrays(fNSlots),
1291 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1292 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr))
1293 {
1294 TString checkupdate = fOptions.fMode;
1295 checkupdate.ToLower();
1296 if (checkupdate == "update") {
1297 throw std::invalid_argument("Snapshot: fMode == \"update\" not supported when implicit MT is enabled");
1298 }
1299 }
1300 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1301 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1302
1303 void InitTask(TTreeReader *r, unsigned int slot)
1304 {
1305 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1306 if (!fOutputFiles[slot]) {
1307 // first time this thread executes something, let's create a TBufferMerger output directory
1308 fOutputFiles[slot] = fMerger->GetFile();
1309 }
1310 TDirectory *treeDirectory = fOutputFiles[slot].get();
1311 if (!fDirName.empty()) {
1312 // call returnExistingDirectory=true since MT can end up making this call multiple times
1313 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1314 }
1315 // re-create output tree as we need to create its branches again, with new input variables
1316 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1317 fOutputTrees[slot] =
1318 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1319 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1320 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1321 fOutputTrees[slot]->SetImplicitMT(false);
1322 if (fOptions.fAutoFlush)
1323 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1324 if (r) {
1325 // not an empty-source RDF
1326 fInputTrees[slot] = r->GetTree();
1327 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1328 // addresses of the branch values. We need this in case of friend trees with different cluster granularity
1329 // than the main tree.
1330 // FIXME: AddClone might result in many many (safe) warnings printed by TTree::CopyAddresses, see ROOT-9487.
1331 const auto friendsListPtr = fInputTrees[slot]->GetListOfFriends();
1332 if (friendsListPtr && friendsListPtr->GetEntries() > 0)
1333 fInputTrees[slot]->AddClone(fOutputTrees[slot].get());
1334 }
1335 fIsFirstEvent[slot] = 1; // reset first event flag for this slot
1336 }
1337
1338 void FinalizeTask(unsigned int slot)
1339 {
1340 if (fOutputTrees[slot]->GetEntries() > 0)
1341 fOutputFiles[slot]->Write();
1342 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1343 fOutputTrees[slot].reset(nullptr);
1344 }
1345
1346 void Exec(unsigned int slot, BranchTypes &... values)
1347 {
1348 using ind_t = std::index_sequence_for<BranchTypes...>;
1349 if (!fIsFirstEvent[slot]) {
1350 UpdateCArraysPtrs(slot, values..., ind_t{});
1351 } else {
1352 SetBranches(slot, values..., ind_t{});
1353 fIsFirstEvent[slot] = 0;
1354 }
1355 UpdateBoolArrays(slot, values..., ind_t{});
1356 fOutputTrees[slot]->Fill();
1357 auto entries = fOutputTrees[slot]->GetEntries();
1358 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1359 if ((autoFlush > 0) && (entries % autoFlush == 0))
1360 fOutputFiles[slot]->Write();
1361 }
1362
1363 template <std::size_t... S>
1364 void UpdateCArraysPtrs(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1365 {
1366 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1367 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1368 // leaving associated to the branch of the output tree an invalid pointer.
1369 // With this code, we set the value of the pointer in the output branch anew when needed.
1370 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1371 // we need an int for the expander list.
1372 (void)slot; // avoid bogus 'unused parameter' warning
1373 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1374 ? fBranches[slot][S]->SetAddress(GetData(values)),
1375 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1376 0};
1377 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1378 }
1379
1380 template <std::size_t... S>
1381 void SetBranches(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1382 {
1383 // hack to call TTree::Branch on all variadic template arguments
1384 int expander[] = {
1385 (SetBranchesHelper(fBoolArrays[slot], fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1386 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], &values),
1387 0)...,
1388 0};
1389 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1390 (void)slot; // avoid unused variable warnings in gcc6.2
1391 }
1392
1393 template <std::size_t... S>
1394 void UpdateBoolArrays(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1395 {
1396 (void)slot; // avoid bogus 'unused parameter' warning
1397 int expander[] = {
1398 (UpdateBoolArray(fBoolArrays[slot], values, fOutputBranchNames[S], *fOutputTrees[slot]), 0)..., 0};
1399 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1400 }
1401
1402 void Initialize()
1403 {
1404 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1405 fMerger = std::make_unique<ROOT::Experimental::TBufferMerger>(fFileName.c_str(), fOptions.fMode.c_str(), cs);
1406 }
1407
1408 void Finalize()
1409 {
1410 auto fileWritten = false;
1411 for (auto &file : fOutputFiles) {
1412 if (file) {
1413 file->Write();
1414 file->Close();
1415 fileWritten = true;
1416 }
1417 }
1418
1419 if (!fileWritten) {
1420 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1421 }
1422
1423 // flush all buffers to disk by destroying the TBufferMerger
1424 fOutputFiles.clear();
1425 fMerger.reset();
1426 }
1427
1428 std::string GetActionName() { return "Snapshot"; }
1429};
1430
1431template <typename Acc, typename Merge, typename R, typename T, typename U,
1432 bool MustCopyAssign = std::is_same<R, U>::value>
1433class AggregateHelper : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1434 Acc fAggregate;
1435 Merge fMerge;
1436 const std::shared_ptr<U> fResult;
1437 Results<U> fAggregators;
1438
1439public:
1440 using ColumnTypes_t = TypeList<T>;
1441 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1442 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1443 {
1444 }
1445 AggregateHelper(AggregateHelper &&) = default;
1446 AggregateHelper(const AggregateHelper &) = delete;
1447
1448 void InitTask(TTreeReader *, unsigned int) {}
1449
1451 void Exec(unsigned int slot, const T &value)
1452 {
1453 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1454 }
1455
1457 void Exec(unsigned int slot, const T &value)
1458 {
1459 fAggregate(fAggregators[slot], value);
1460 }
1461
1462 void Initialize() { /* noop */}
1463
1464 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1465 bool MergeAll = std::is_same<void, MergeRet>::value>
1466 typename std::enable_if<MergeAll, void>::type Finalize()
1467 {
1468 fMerge(fAggregators);
1469 *fResult = fAggregators[0];
1470 }
1471
1472 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1473 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1474 typename std::enable_if<MergeTwoByTwo, void>::type Finalize(...) // ... needed to let compiler distinguish overloads
1475 {
1476 for (const auto &acc : fAggregators)
1477 *fResult = fMerge(*fResult, acc);
1478 }
1479
1480 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1481
1482 std::string GetActionName() { return "Aggregate"; }
1483};
1484
1485} // end of NS RDF
1486} // end of NS Internal
1487} // end of NS ROOT
1488
1489/// \endcond
1490
1491#endif
Handle_t Display_t
Definition: GuiTypes.h:26
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static const double x2[5]
static const double x1[5]
static const double x3[11]
unsigned long long ULong64_t
Definition: RtypesCore.h:70
void Warning(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
Binding & operator=(OUT(*fun)(void))
typedef void((*Func_t)())
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:64
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:274
iterator end() noexcept
Definition: RVec.hxx:389
const Impl_t & AsVector() const
Definition: RVec.hxx:350
iterator begin() noexcept
Definition: RVec.hxx:386
A TTree is a list of TBranches.
Definition: TBranch.h:91
Small helper to keep current directory context.
Definition: TDirectory.h:41
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3923
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
The TH1 histogram class.
Definition: TH1.h:56
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:49
A doubly linked list.
Definition: TList.h:44
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:43
A TTree represents a columnar dataset.
Definition: TTree.h:72
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5170
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Definition: TTree.cxx:8127
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:341
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition: TTree.h:249
void AddClone(TTree *)
Add a cloned tree to our list of trees to be notified whenever we change our branch addresses or when...
Definition: TTree.cxx:1188
Double_t x[n]
Definition: legend1.C:17
basic_string_view< char > string_view
#define F(x, y, z)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:283
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:243
double T(double x)
Definition: ChebyshevPol.h:34
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
VSD Structures.
Definition: StringConv.hxx:21
@ kSTLvector
Definition: ESTLType.h:30
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:788
int CompressionSettings(RCompressionSetting::EAlgorithm algorithm, int compressionLevel)
RooArgSet S(const RooAbsArg &v1)
RooCmdArg Columns(Int_t ncol)
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
const char * Size
Definition: TXMLSetup.cxx:55
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.
Check for container traits.
Definition: TypeTraits.hxx:98
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
void ws()
Definition: ws.C:66