Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
InterfaceUtils.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE_UTILS
12#define ROOT_RDF_TINTERFACE_UTILS
13
14#include "RColumnRegister.hxx"
15#include <ROOT/RDF/RAction.hxx>
16#include <ROOT/RDF/ActionHelpers.hxx> // for BuildAction
18#include <ROOT/RDF/RDefine.hxx>
20#include <ROOT/RDF/RFilter.hxx>
21#include <ROOT/RDF/Utils.hxx>
27#include <ROOT/RStringView.hxx>
29#include <ROOT/TypeTraits.hxx>
30#include <TError.h> // gErrorIgnoreLevel
31#include <TH1.h>
32#include <TROOT.h> // IsImplicitMTEnabled
33
34#include <deque>
35#include <functional>
36#include <map>
37#include <memory>
38#include <string>
39#include <type_traits>
40#include <typeinfo>
41#include <vector>
42#include <unordered_map>
43
44class TObjArray;
45class TTree;
46namespace ROOT {
47namespace Detail {
48namespace RDF {
49class RNodeBase;
50}
51}
52namespace RDF {
53template <typename T>
54class RResultPtr;
55template<typename T, typename V>
56class RInterface;
58class RDataSource;
59} // namespace RDF
60
61} // namespace ROOT
62
63/// \cond HIDDEN_SYMBOLS
64
65namespace ROOT {
66namespace Internal {
67namespace RDF {
68using namespace ROOT::Detail::RDF;
69using namespace ROOT::RDF;
70namespace TTraits = ROOT::TypeTraits;
71
73
74std::string DemangleTypeIdName(const std::type_info &typeInfo);
75
77ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName);
78
79/// An helper object that sets and resets gErrorIgnoreLevel via RAII.
80class RIgnoreErrorLevelRAII {
81private:
82 int fCurIgnoreErrorLevel = gErrorIgnoreLevel;
83
84public:
85 RIgnoreErrorLevelRAII(int errorIgnoreLevel) { gErrorIgnoreLevel = errorIgnoreLevel; }
86 ~RIgnoreErrorLevelRAII() { gErrorIgnoreLevel = fCurIgnoreErrorLevel; }
87};
88
89/****** BuildAction overloads *******/
90
91// clang-format off
92/// This namespace defines types to be used for tag dispatching in RInterface.
93namespace ActionTags {
94struct Histo1D{};
95struct Histo2D{};
96struct Histo3D{};
97struct HistoND{};
98struct Graph{};
99struct Profile1D{};
100struct Profile2D{};
101struct Min{};
102struct Max{};
103struct Sum{};
104struct Mean{};
105struct Fill{};
106struct StdDev{};
107struct Display{};
108struct Snapshot{};
109struct Book{};
110}
111// clang-format on
112
113template <typename T, bool ISV6HISTO = std::is_base_of<TH1, std::decay_t<T>>::value>
114struct HistoUtils {
115 static void SetCanExtendAllAxes(T &h) { h.SetCanExtend(::TH1::kAllAxes); }
116 static bool HasAxisLimits(T &h)
117 {
118 auto xaxis = h.GetXaxis();
119 return !(xaxis->GetXmin() == 0. && xaxis->GetXmax() == 0.);
120 }
121};
122
123template <typename T>
124struct HistoUtils<T, false> {
125 static void SetCanExtendAllAxes(T &) {}
126 static bool HasAxisLimits(T &) { return true; }
127};
128
129// Generic filling (covers Histo2D, Histo3D, HistoND, Profile1D and Profile2D actions, with and without weights)
130template <typename... ColTypes, typename ActionTag, typename ActionResultType, typename PrevNodeType>
131std::unique_ptr<RActionBase>
132BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h, const unsigned int nSlots,
133 std::shared_ptr<PrevNodeType> prevNode, ActionTag, const RColumnRegister &colRegister)
134{
135 using Helper_t = FillParHelper<ActionResultType>;
136 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
137 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
138}
139
140// Histo1D filling (must handle the special case of distinguishing FillParHelper and FillHelper
141template <typename... ColTypes, typename PrevNodeType>
142std::unique_ptr<RActionBase>
143BuildAction(const ColumnNames_t &bl, const std::shared_ptr<::TH1D> &h, const unsigned int nSlots,
144 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Histo1D, const RColumnRegister &colRegister)
145{
146 auto hasAxisLimits = HistoUtils<::TH1D>::HasAxisLimits(*h);
147
148 if (hasAxisLimits) {
149 using Helper_t = FillParHelper<::TH1D>;
150 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
151 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
152 } else {
153 using Helper_t = FillHelper;
154 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
155 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
156 }
157}
158
159template <typename... ColTypes, typename PrevNodeType>
160std::unique_ptr<RActionBase>
161BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraph> &g, const unsigned int nSlots,
162 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Graph, const RColumnRegister &colRegister)
163{
164 using Helper_t = FillTGraphHelper;
165 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
166 return std::make_unique<Action_t>(Helper_t(g, nSlots), bl, std::move(prevNode), colRegister);
167}
168
169// Min action
170template <typename ColType, typename PrevNodeType, typename ActionResultType>
171std::unique_ptr<RActionBase>
172BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &minV, const unsigned int nSlots,
173 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Min, const RColumnRegister &colRegister)
174{
175 using Helper_t = MinHelper<ActionResultType>;
176 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
177 return std::make_unique<Action_t>(Helper_t(minV, nSlots), bl, std::move(prevNode), colRegister);
178}
179
180// Max action
181template <typename ColType, typename PrevNodeType, typename ActionResultType>
182std::unique_ptr<RActionBase>
183BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &maxV, const unsigned int nSlots,
184 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Max, const RColumnRegister &colRegister)
185{
186 using Helper_t = MaxHelper<ActionResultType>;
187 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
188 return std::make_unique<Action_t>(Helper_t(maxV, nSlots), bl, std::move(prevNode), colRegister);
189}
190
191// Sum action
192template <typename ColType, typename PrevNodeType, typename ActionResultType>
193std::unique_ptr<RActionBase>
194BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &sumV, const unsigned int nSlots,
195 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Sum, const RColumnRegister &colRegister)
196{
197 using Helper_t = SumHelper<ActionResultType>;
198 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
199 return std::make_unique<Action_t>(Helper_t(sumV, nSlots), bl, std::move(prevNode), colRegister);
200}
201
202// Mean action
203template <typename ColType, typename PrevNodeType>
204std::unique_ptr<RActionBase>
205BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, const unsigned int nSlots,
206 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Mean, const RColumnRegister &colRegister)
207{
208 using Helper_t = MeanHelper;
209 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
210 return std::make_unique<Action_t>(Helper_t(meanV, nSlots), bl, std::move(prevNode), colRegister);
211}
212
213// Standard Deviation action
214template <typename ColType, typename PrevNodeType>
215std::unique_ptr<RActionBase>
216BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &stdDeviationV, const unsigned int nSlots,
217 std::shared_ptr<PrevNodeType> prevNode, ActionTags::StdDev, const RColumnRegister &colRegister)
218{
219 using Helper_t = StdDevHelper;
220 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
221 return std::make_unique<Action_t>(Helper_t(stdDeviationV, nSlots), bl, prevNode, colRegister);
222}
223
224using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<ROOT::RDF::RDisplay>>;
225
226// Display action
227template <typename... ColTypes, typename PrevNodeType>
228std::unique_ptr<RActionBase>
229BuildAction(const ColumnNames_t &bl, const std::shared_ptr<displayHelperArgs_t> &helperArgs, const unsigned int,
230 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Display,
231 const RDFInternal::RColumnRegister &colRegister)
232{
233 using Helper_t = DisplayHelper<PrevNodeType>;
234 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
235 return std::make_unique<Action_t>(Helper_t(helperArgs->first, helperArgs->second, prevNode), bl, prevNode,
236 colRegister);
237}
238
239struct SnapshotHelperArgs {
240 std::string fFileName;
241 std::string fDirName;
242 std::string fTreeName;
243 std::vector<std::string> fOutputColNames;
245};
246
247// Snapshot action
248template <typename... ColTypes, typename PrevNodeType>
249std::unique_ptr<RActionBase>
250BuildAction(const ColumnNames_t &colNames, const std::shared_ptr<SnapshotHelperArgs> &snapHelperArgs,
251 const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::Snapshot,
252 const RColumnRegister &colRegister)
253{
254 const auto &filename = snapHelperArgs->fFileName;
255 const auto &dirname = snapHelperArgs->fDirName;
256 const auto &treename = snapHelperArgs->fTreeName;
257 const auto &outputColNames = snapHelperArgs->fOutputColNames;
258 const auto &options = snapHelperArgs->fOptions;
259
260 auto makeIsDefine = [&] {
261 std::vector<bool> isDef;
262 isDef.reserve(sizeof...(ColTypes));
263 for (auto i = 0u; i < sizeof...(ColTypes); ++i)
264 isDef[i] = colRegister.HasName(colNames[i]);
265 return isDef;
266 };
267 std::vector<bool> isDefine = makeIsDefine();
268
269 std::unique_ptr<RActionBase> actionPtr;
271 // single-thread snapshot
272 using Helper_t = SnapshotHelper<ColTypes...>;
273 using Action_t = RAction<Helper_t, PrevNodeType>;
274 actionPtr.reset(
275 new Action_t(Helper_t(filename, dirname, treename, colNames, outputColNames, options, std::move(isDefine)),
276 colNames, prevNode, colRegister));
277 } else {
278 // multi-thread snapshot
279 using Helper_t = SnapshotHelperMT<ColTypes...>;
280 using Action_t = RAction<Helper_t, PrevNodeType>;
281 actionPtr.reset(new Action_t(
282 Helper_t(nSlots, filename, dirname, treename, colNames, outputColNames, options, std::move(isDefine)),
283 colNames, prevNode, colRegister));
284 }
285 return actionPtr;
286}
287
288// Book with custom helper type
289template <typename... ColTypes, typename PrevNodeType, typename Helper_t>
290std::unique_ptr<RActionBase>
291BuildAction(const ColumnNames_t &bl, const std::shared_ptr<Helper_t> &h, const unsigned int /*nSlots*/,
292 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Book, const RColumnRegister &colRegister)
293{
294 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
295 return std::make_unique<Action_t>(Helper_t(std::move(*h)), bl, std::move(prevNode), colRegister);
296}
297
298/****** end BuildAndBook ******/
299
300template <typename Filter>
301void CheckFilter(Filter &)
302{
303 using FilterRet_t = typename RDF::CallableTraits<Filter>::ret_type;
304 static_assert(std::is_convertible<FilterRet_t, bool>::value,
305 "filter expression returns a type that is not convertible to bool");
306}
307
308ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action);
309
310void CheckValidCppVarName(std::string_view var, const std::string &where);
311
312void CheckForRedefinition(const std::string &where, std::string_view definedCol, const RColumnRegister &customCols,
313 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns);
314
315void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols,
316 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns);
317
318void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols);
319
320std::string PrettyPrintAddr(const void *const addr);
321
322std::shared_ptr<RJittedFilter> BookFilterJit(std::shared_ptr<RNodeBase> *prevNodeOnHeap, std::string_view name,
323 std::string_view expression, const ColumnNames_t &branches,
324 const RColumnRegister &customCols, TTree *tree, RDataSource *ds);
325
326std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
327 RDataSource *ds, const RColumnRegister &customCols,
328 const ColumnNames_t &branches, std::shared_ptr<RNodeBase> *prevNodeOnHeap);
329
330std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
331 RLoopManager &lm, const RColumnRegister &customCols,
332 std::shared_ptr<RNodeBase> *upcastNodeOnHeap);
333
334std::shared_ptr<RJittedVariation>
335BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
336 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
337 RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches,
338 std::shared_ptr<RNodeBase> *upcastNodeOnHeap);
339
340std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
341 const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree,
342 const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
343 std::weak_ptr<RJittedAction> *jittedActionOnHeap);
344
345// Allocate a weak_ptr on the heap, return a pointer to it. The user is responsible for deleting this weak_ptr.
346// This function is meant to be used by RInterface's methods that book code for jitting.
347// The problem it solves is that we generate code to be lazily jitted with the addresses of certain objects in them,
348// and we need to check those objects are still alive when the generated code is finally jitted and executed.
349// So we pass addresses to weak_ptrs allocated on the heap to the jitted code, which is then responsible for
350// the deletion of the weak_ptr object.
351template <typename T>
352std::weak_ptr<T> *MakeWeakOnHeap(const std::shared_ptr<T> &shPtr)
353{
354 return new std::weak_ptr<T>(shPtr);
355}
356
357// Same as MakeWeakOnHeap, but create a shared_ptr that makes sure the object is definitely kept alive.
358template <typename T>
359std::shared_ptr<T> *MakeSharedOnHeap(const std::shared_ptr<T> &shPtr)
360{
361 return new std::shared_ptr<T>(shPtr);
362}
363
364bool AtLeastOneEmptyString(const std::vector<std::string_view> strings);
365
366/// Take a shared_ptr<AnyNodeType> and return a shared_ptr<RNodeBase>.
367/// This works for RLoopManager nodes as well as filters and ranges.
368std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr);
369
370ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
371 const RColumnRegister &validDefines, RDataSource *ds);
372
373std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
374 TTree *tree, RDataSource *ds, const std::string &context,
375 bool vector2rvec);
376
377std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols);
378
379template <typename T>
380void AddDSColumnsHelper(const std::string &colName, RLoopManager &lm, RDataSource &ds, RColumnRegister &colRegister)
381{
382 if (colRegister.HasName(colName) || !ds.HasColumn(colName) || lm.HasDSValuePtrs(colName))
383 return;
384
385 const auto valuePtrs = ds.GetColumnReaders<T>(colName);
386 if (!valuePtrs.empty()) {
387 // we are using the old GetColumnReaders mechanism
388 std::vector<void*> typeErasedValuePtrs(valuePtrs.begin(), valuePtrs.end());
389 lm.AddDSValuePtrs(colName, std::move(typeErasedValuePtrs));
390 }
391}
392
393/// Take list of column names that must be defined, current map of custom columns, current list of defined column names,
394/// and return a new map of custom columns (with the new datasource columns added to it)
395template <typename... ColumnTypes>
396void AddDSColumns(const std::vector<std::string> &requiredCols, RLoopManager &lm, RDataSource &ds,
397 TTraits::TypeList<ColumnTypes...>, RColumnRegister &colRegister)
398{
399 // hack to expand a template parameter pack without c++17 fold expressions.
400 using expander = int[];
401 int i = 0;
402 (void)expander{(AddDSColumnsHelper<ColumnTypes>(requiredCols[i], lm, ds, colRegister), ++i)..., 0};
403}
404
405// this function is meant to be called by the jitted code generated by BookFilterJit
406template <typename F, typename PrevNode>
407void JitFilterHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name,
408 std::weak_ptr<RJittedFilter> *wkJittedFilter, std::shared_ptr<PrevNode> *prevNodeOnHeap,
409 RColumnRegister *colRegister) noexcept
410{
411 if (wkJittedFilter->expired()) {
412 // The branch of the computation graph that needed this jitted code went out of scope between the type
413 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
414 delete wkJittedFilter;
415 // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive
416 // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope
417 delete colRegister;
418 delete prevNodeOnHeap;
419 return;
420 }
421
422 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
423 delete[] colsPtr;
424
425 const auto jittedFilter = wkJittedFilter->lock();
426
427 // mock Filter logic -- validity checks and Define-ition of RDataSource columns
428 using Callable_t = std::decay_t<F>;
430 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
431 constexpr auto nColumns = ColTypes_t::list_size;
432 CheckFilter(f);
433
434 auto &lm = *jittedFilter->GetLoopManagerUnchecked(); // RLoopManager must exist at this time
435 auto ds = lm.GetDataSource();
436
437 if (ds != nullptr)
438 AddDSColumns(cols, lm, *ds, ColTypes_t(), *colRegister);
439
440 jittedFilter->SetFilter(
441 std::unique_ptr<RFilterBase>(new F_t(std::forward<F>(f), cols, *prevNodeOnHeap, *colRegister, name)));
442 // colRegister points to the columns structure in the heap, created before the jitted call so that the jitter can
443 // share data after it has lazily compiled the code. Here the data has been used and the memory can be freed.
444 delete colRegister;
445 delete prevNodeOnHeap;
446 delete wkJittedFilter;
447}
448
449namespace DefineTypes {
450struct RDefineTag {};
451struct RDefinePerSampleTag {};
452}
453
454template <typename F>
455auto MakeDefineNode(DefineTypes::RDefineTag, std::string_view name, std::string_view dummyType, F &&f,
456 const ColumnNames_t &cols, RColumnRegister &colRegister, RLoopManager &lm)
457{
458 return std::unique_ptr<RDefineBase>(new RDefine<std::decay_t<F>, CustomColExtraArgs::None>(
459 name, dummyType, std::forward<F>(f), cols, colRegister, lm));
460}
461
462template <typename F>
463auto MakeDefineNode(DefineTypes::RDefinePerSampleTag, std::string_view name, std::string_view dummyType, F &&f,
464 const ColumnNames_t &, RColumnRegister &, RLoopManager &lm)
465{
466 return std::unique_ptr<RDefineBase>(
467 new RDefinePerSample<std::decay_t<F>>(name, dummyType, std::forward<F>(f), lm));
468}
469
470// Build a RDefine or a RDefinePerSample object and attach it to an existing RJittedDefine
471// This function is meant to be called by jitted code right before starting the event loop.
472// If colsPtr is null, build a RDefinePerSample (it has no input columns), otherwise a RDefine.
473template <typename RDefineTypeTag, typename F>
474void JitDefineHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name, RLoopManager *lm,
475 std::weak_ptr<RJittedDefine> *wkJittedDefine, RColumnRegister *colRegister,
476 std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept
477{
478 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
479 auto doDeletes = [&] {
480 delete wkJittedDefine;
481 // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive
482 // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope
483 delete colRegister;
484 delete prevNodeOnHeap;
485 delete[] colsPtr;
486 };
487
488 if (wkJittedDefine->expired()) {
489 // The branch of the computation graph that needed this jitted code went out of scope between the type
490 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
491 doDeletes();
492 return;
493 }
494
495 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
496
497 auto jittedDefine = wkJittedDefine->lock();
498
499 using Callable_t = std::decay_t<F>;
500 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
501
502 auto ds = lm->GetDataSource();
503 if (ds != nullptr)
504 AddDSColumns(cols, *lm, *ds, ColTypes_t(), *colRegister);
505
506 // will never actually be used (trumped by jittedDefine->GetTypeName()), but we set it to something meaningful
507 // to help devs debugging
508 const auto dummyType = "jittedCol_t";
509 // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
510 std::unique_ptr<RDefineBase> newCol{
511 MakeDefineNode(RDefineTypeTag{}, name, dummyType, std::forward<F>(f), cols, *colRegister, *lm)};
512 jittedDefine->SetDefine(std::move(newCol));
513
514 doDeletes();
515}
516
517template <typename F>
518void JitVariationHelper(F &&f, const char **colsPtr, std::size_t colsSize, const char **variedCols,
519 std::size_t variedColsSize, const char **variationTags, std::size_t variationTagsSize,
520 std::string_view variationName, RLoopManager *lm,
521 std::weak_ptr<RJittedVariation> *wkJittedVariation, RColumnRegister *colRegister,
522 std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept
523{
524 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
525 auto doDeletes = [&] {
526 delete[] colsPtr;
527 delete[] variedCols;
528 delete[] variationTags;
529
530 delete wkJittedVariation;
531 // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive
532 // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope
533 delete colRegister;
534 delete prevNodeOnHeap;
535 };
536
537 if (wkJittedVariation->expired()) {
538 // The branch of the computation graph that needed this jitted variation went out of scope between the type
539 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
540 doDeletes();
541 return;
542 }
543
544 const ColumnNames_t inputColNames(colsPtr, colsPtr + colsSize);
545 std::vector<std::string> variedColNames(variedCols, variedCols + variedColsSize);
546 std::vector<std::string> tags(variationTags, variationTags + variationTagsSize);
547
548 auto jittedVariation = wkJittedVariation->lock();
549
550 using Callable_t = std::decay_t<F>;
551 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
552
553 auto ds = lm->GetDataSource();
554 if (ds != nullptr)
555 AddDSColumns(inputColNames, *lm, *ds, ColTypes_t(), *colRegister);
556
557 // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
558 std::unique_ptr<RVariationBase> newVariation{
559 new RVariation<std::decay_t<F>>(std::move(variedColNames), variationName, std::forward<F>(f), std::move(tags),
560 jittedVariation->GetTypeName(), *colRegister, *lm, std::move(inputColNames))};
561 jittedVariation->SetVariation(std::move(newVariation));
562
563 doDeletes();
564}
565
566/// Convenience function invoked by jitted code to build action nodes at runtime
567template <typename ActionTag, typename... ColTypes, typename PrevNodeType, typename HelperArgType>
568void CallBuildAction(std::shared_ptr<PrevNodeType> *prevNodeOnHeap, const char **colsPtr, std::size_t colsSize,
569 const unsigned int nSlots, std::shared_ptr<HelperArgType> *helperArgOnHeap,
570 std::weak_ptr<RJittedAction> *wkJittedActionOnHeap, RColumnRegister *colRegister) noexcept
571{
572 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
573 auto doDeletes = [&] {
574 delete[] colsPtr;
575 delete helperArgOnHeap;
576 delete wkJittedActionOnHeap;
577 // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive
578 // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope
579 delete colRegister;
580 delete prevNodeOnHeap;
581 };
582
583 if (wkJittedActionOnHeap->expired()) {
584 // The branch of the computation graph that needed this jitted variation went out of scope between the type
585 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
586 doDeletes();
587 return;
588 }
589
590 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
591
592 auto jittedActionOnHeap = wkJittedActionOnHeap->lock();
593
594 // if we are here it means we are jitting, if we are jitting the loop manager must be alive
595 auto &prevNodePtr = *prevNodeOnHeap;
596 auto &loopManager = *prevNodePtr->GetLoopManagerUnchecked();
597 using ColTypes_t = TypeList<ColTypes...>;
598 constexpr auto nColumns = ColTypes_t::list_size;
599 auto ds = loopManager.GetDataSource();
600 if (ds != nullptr)
601 AddDSColumns(cols, loopManager, *ds, ColTypes_t(), *colRegister);
602
603 auto actionPtr = BuildAction<ColTypes...>(cols, std::move(*helperArgOnHeap), nSlots, std::move(prevNodePtr),
604 ActionTag{}, *colRegister);
605 jittedActionOnHeap->SetAction(std::move(actionPtr));
606
607 doDeletes();
608}
609
610/// The contained `type` alias is `double` if `T == RInferredType`, `U` if `T == std::container<U>`, `T` otherwise.
611template <typename T, bool Container = IsDataContainer<T>::value && !std::is_same<T, std::string>::value>
612struct RMinReturnType {
613 using type = T;
614};
615
616template <>
617struct RMinReturnType<RInferredType, false> {
618 using type = double;
619};
620
621template <typename T>
622struct RMinReturnType<T, true> {
623 using type = TTraits::TakeFirstParameter_t<T>;
624};
625
626// return wrapper around f that prepends an `unsigned int slot` parameter
627template <typename R, typename F, typename... Args>
628std::function<R(unsigned int, Args...)> AddSlotParameter(F &f, TypeList<Args...>)
629{
630 return [f](unsigned int, Args... a) mutable -> R { return f(a...); };
631}
632
633template <typename ColType, typename... Rest>
634struct RNeedJittingHelper {
635 static constexpr bool value = RNeedJittingHelper<Rest...>::value;
636};
637
638template <typename... Rest>
639struct RNeedJittingHelper<RInferredType, Rest...> {
640 static constexpr bool value = true;
641};
642
643template <typename T>
644struct RNeedJittingHelper<T> {
645 static constexpr bool value = false;
646};
647
648template <>
649struct RNeedJittingHelper<RInferredType> {
650 static constexpr bool value = true;
651};
652
653template <typename ...ColTypes>
654struct RNeedJitting {
655 static constexpr bool value = RNeedJittingHelper<ColTypes...>::value;
656};
657
658template <>
659struct RNeedJitting<> {
660 static constexpr bool value = false;
661};
662
663///////////////////////////////////////////////////////////////////////////////
664/// Check preconditions for RInterface::Aggregate:
665/// - the aggregator callable must have signature `U(U,T)` or `void(U&,T)`.
666/// - the merge callable must have signature `U(U,U)` or `void(std::vector<U>&)`
667template <typename R, typename Merge, typename U, typename T, typename decayedU = std::decay_t<U>,
668 typename mergeArgsNoDecay_t = typename CallableTraits<Merge>::arg_types_nodecay,
669 typename mergeArgs_t = typename CallableTraits<Merge>::arg_types,
670 typename mergeRet_t = typename CallableTraits<Merge>::ret_type>
671void CheckAggregate(TypeList<U, T>)
672{
673 constexpr bool isAggregatorOk =
674 (std::is_same<R, decayedU>::value) || (std::is_same<R, void>::value && std::is_lvalue_reference<U>::value);
675 static_assert(isAggregatorOk, "aggregator function must have signature `U(U,T)` or `void(U&,T)`");
676 constexpr bool isMergeOk =
677 (std::is_same<TypeList<decayedU, decayedU>, mergeArgs_t>::value && std::is_same<decayedU, mergeRet_t>::value) ||
678 (std::is_same<TypeList<std::vector<decayedU> &>, mergeArgsNoDecay_t>::value &&
679 std::is_same<void, mergeRet_t>::value);
680 static_assert(isMergeOk, "merge function must have signature `U(U,U)` or `void(std::vector<U>&)`");
681}
682
683///////////////////////////////////////////////////////////////////////////////
684/// This overload of CheckAggregate is called when the aggregator takes more than two arguments
685template <typename R, typename T>
686void CheckAggregate(T)
687{
688 static_assert(sizeof(T) == 0, "aggregator function must take exactly two arguments");
689}
690
691///////////////////////////////////////////////////////////////////////////////
692/// Check as many template parameters were passed as the number of column names, throw if this is not the case.
693void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames);
694
695/// Return local BranchNames or default BranchNames according to which one should be used
696const ColumnNames_t SelectColumns(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl);
697
698/// Check whether column names refer to a valid branch of a TTree or have been `Define`d. Return invalid column names.
699ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns,
700 const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns);
701
702/// Returns the list of Filters defined in the whole graph
703std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager);
704
705/// Returns the list of Filters defined in the branch
706template <typename NodeType>
707std::vector<std::string> GetFilterNames(const std::shared_ptr<NodeType> &node)
708{
709 std::vector<std::string> filterNames;
710 node->AddFilterName(filterNames);
711 return filterNames;
712}
713
714struct ParsedTreePath {
715 std::string fTreeName;
716 std::string fDirName;
717};
718
719ParsedTreePath ParseTreePath(std::string_view fullTreeName);
720
721// Check if a condition is true for all types
722template <bool...>
723struct TBoolPack;
724
725template <bool... bs>
726using IsTrueForAllImpl_t = typename std::is_same<TBoolPack<bs..., true>, TBoolPack<true, bs...>>;
727
728template <bool... Conditions>
729struct TEvalAnd {
730 static constexpr bool value = IsTrueForAllImpl_t<Conditions...>::value;
731};
732
733// Check if a class is a specialisation of stl containers templates
734// clang-format off
735
736template <typename>
737struct IsList_t : std::false_type {};
738
739template <typename T>
740struct IsList_t<std::list<T>> : std::true_type {};
741
742template <typename>
743struct IsDeque_t : std::false_type {};
744
745template <typename T>
746struct IsDeque_t<std::deque<T>> : std::true_type {};
747// clang-format on
748
750
751void TriggerRun(ROOT::RDF::RNode &node);
752
753template <typename T>
754struct InnerValueType {
755 using type = T; // fallback for when T is not a nested RVec
756};
757
758template <typename Elem>
759struct InnerValueType<ROOT::VecOps::RVec<ROOT::VecOps::RVec<Elem>>> {
760 using type = Elem;
761};
762
763template <typename T>
764using InnerValueType_t = typename InnerValueType<T>::type;
765
766std::pair<std::vector<std::string>, std::vector<std::string>>
767AddSizeBranches(const std::vector<std::string> &branches, TTree *tree, std::vector<std::string> &&colsWithoutAliases,
768 std::vector<std::string> &&colsWithAliases);
769
770void RemoveDuplicates(ColumnNames_t &columnNames);
771
772} // namespace RDF
773} // namespace Internal
774
775namespace Detail {
776namespace RDF {
777
778/// The aliased type is `double` if `T == RInferredType`, `U` if `T == container<U>`, `T` otherwise.
779template <typename T>
780using MinReturnType_t = typename RDFInternal::RMinReturnType<T>::type;
781
782template <typename T>
783using MaxReturnType_t = MinReturnType_t<T>;
784
785template <typename T>
786using SumReturnType_t = MinReturnType_t<T>;
787
788} // namespace RDF
789} // namespace Detail
790} // namespace ROOT
791
792/// \endcond
793
794#endif
double
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
#define f(i)
Definition RSha256.hxx:104
#define g(i)
Definition RSha256.hxx:105
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
R__EXTERN Int_t gErrorIgnoreLevel
Definition TError.h:127
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
The head node of a RDF computation graph.
void AddDSValuePtrs(const std::string &col, const std::vector< void * > ptrs)
RDataSource * GetDataSource() const
bool HasDSValuePtrs(const std::string &col) const
RLoopManager * GetLoopManagerUnchecked() final
A binder for user-defined columns and aliases.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
std::vector< T ** > GetColumnReaders(std::string_view columnName)
Called at most once per column by RDF.
The public interface to the RDataFrame federation of classes.
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1455
@ kAllAxes
Definition TH1.h:75
An array of TObjects.
Definition TObjArray.h:31
A TTree represents a columnar dataset.
Definition TTree.h:79
R Sum(const RVec< T > &v, const R zero=R(0))
Sum elements of an RVec.
Definition RVec.hxx:1877
#define F(x, y, z)
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
double T(double x)
std::vector< std::string > ColumnNames_t
Definition Utils.hxx:35
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
Short_t Max(Short_t a, Short_t b)
Definition TMathBase.h:208
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Return the weighted mean of an array a with length n.
Definition TMath.h:1023
Short_t Min(Short_t a, Short_t b)
Definition TMathBase.h:176
Double_t StdDev(Long64_t n, const T *a, const Double_t *w=0)
Definition TMath.h:530
Definition tree.py:1
A collection of options to steer the creation of the dataset on file.
Lightweight storage for a collection of types.