Logo ROOT   6.18/05
Reference Guide
RLoopManager.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLOOPMANAGER
12#define ROOT_RLOOPMANAGER
13
16
17#include <functional>
18#include <map>
19#include <memory>
20#include <string>
21#include <vector>
22
23// forward declarations
24class TTreeReader;
25
26namespace ROOT {
27namespace RDF {
28class RCutFlowReport;
29class RDataSource;
30} // ns RDF
31
32namespace Internal {
33namespace RDF {
34ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates = true);
35
36class RActionBase;
37class GraphNode;
38
39namespace GraphDrawing {
41} // ns GraphDrawing
42} // ns RDF
43} // ns Internal
44
45namespace Detail {
46namespace RDF {
47using namespace ROOT::TypeTraits;
49
50class RCustomColumnBase;
51class RFilterBase;
52class RRangeBase;
53
54/// The head node of a RDF computation graph.
55/// This class is responsible of running the event loop.
56class RLoopManager : public RNodeBase {
58 enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT };
59 using Callback_t = std::function<void(unsigned int)>;
60 class TCallback {
63 std::vector<ULong64_t> fCounters;
64
65 public:
66 TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
67 : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
68 {
69 }
70
71 void operator()(unsigned int slot)
72 {
73 auto &c = fCounters[slot];
74 ++c;
75 if (c == fEveryN) {
76 c = 0ull;
77 fFun(slot);
78 }
79 }
80 };
81
84 std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
85
86 public:
87 TOneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
88
89 void operator()(unsigned int slot)
90 {
91 if (fHasBeenCalled[slot] == 1)
92 return;
93 fFun(slot);
94 fHasBeenCalled[slot] = 1;
95 }
96 };
97
98 std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
99 std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
100 std::vector<RFilterBase *> fBookedFilters;
101 std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
102 std::vector<RRangeBase *> fBookedRanges;
103
104 /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
105 /// argument to RDataFrame's ctor (in which case we let users retain ownership).
106 std::shared_ptr<TTree> fTree{nullptr};
109 const unsigned int fNSlots{1};
111 const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
112 std::string fToJitDeclare; ///< Code that should be just-in-time declared right before the event loop
113 std::string fToJitExec; ///< Code that should be just-in-time executed right before the event loop
114 const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
115 std::map<std::string, std::string> fAliasColumnNameMap; ///< ColumnNameAlias-columnName pairs
116 std::vector<TCallback> fCallbacks; ///< Registered callbacks
117 std::vector<TOneTimeCallback> fCallbacksOnce; ///< Registered callbacks to invoke just once before running the loop
118 /// A unique ID that identifies the computation graph that starts with this RLoopManager.
119 /// Used, for example, to jit objects in a namespace reserved for this computation graph
120 const unsigned int fID = GetNextID();
121
122 std::vector<RCustomColumnBase *> fCustomColumns; ///< Non-owning container of all custom columns created so far.
123 /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
125
126 void CheckIndexedFriends();
127 void RunEmptySourceMT();
128 void RunEmptySource();
129 void RunTreeProcessorMT();
130 void RunTreeReader();
131 void RunDataSourceMT();
132 void RunDataSource();
133 void RunAndCheckFilters(unsigned int slot, Long64_t entry);
134 void InitNodeSlots(TTreeReader *r, unsigned int slot);
135 void InitNodes();
136 void CleanUpNodes();
137 void CleanUpTask(unsigned int slot);
138 void EvalChildrenCounts();
139 static unsigned int GetNextID();
140
141public:
142 RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
143 RLoopManager(ULong64_t nEmptyEntries);
144 RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
145 RLoopManager(const RLoopManager &) = delete;
147
148 void JitDeclarations();
149 void Jit();
150 RLoopManager *GetLoopManagerUnchecked() final { return this; }
151 void Run();
153 TTree *GetTree() const;
156 RDataSource *GetDataSource() const { return fDataSource.get(); }
157 void Book(RDFInternal::RActionBase *actionPtr);
158 void Deregister(RDFInternal::RActionBase *actionPtr);
159 void Book(RFilterBase *filterPtr);
160 void Deregister(RFilterBase *filterPtr);
161 void Book(RRangeBase *rangePtr);
162 void Deregister(RRangeBase *rangePtr);
163 bool CheckFilters(unsigned int, Long64_t) final;
164 unsigned int GetNSlots() const { return fNSlots; }
165 void Report(ROOT::RDF::RCutFlowReport &rep) const final;
166 /// End of recursive chain of calls, does nothing
168 void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
169 void IncrChildrenCount() final { ++fNChildren; }
170 void StopProcessing() final { ++fNStopsReceived; }
171 void ToJitDeclare(const std::string &s) { fToJitDeclare.append(s); }
172 void ToJitExec(const std::string &s) { fToJitExec.append(s); }
173 void AddColumnAlias(const std::string &alias, const std::string &colName) { fAliasColumnNameMap[alias] = colName; }
174 const std::map<std::string, std::string> &GetAliasMap() const { return fAliasColumnNameMap; }
175 void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
176 unsigned int GetID() const { return fID; }
177
178 /// End of recursive chain of calls, does nothing
179 void AddFilterName(std::vector<std::string> &) {}
180 /// For each booked filter, returns either the name or "Unnamed Filter"
181 std::vector<std::string> GetFiltersNames();
182
183 /// For all the actions, either booked or run
184 std::vector<RDFInternal::RActionBase *> GetAllActions();
185
186 void RegisterCustomColumn(RCustomColumnBase *column) { fCustomColumns.push_back(column); }
187
189 {
190 fCustomColumns.erase(std::remove(fCustomColumns.begin(), fCustomColumns.end(), column), fCustomColumns.end());
191 }
192
193 std::vector<RDFInternal::RActionBase *> GetBookedActions() { return fBookedActions; }
194 std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph();
195
197};
198
199} // ns RDF
200} // ns Detail
201} // ns ROOT
202
203#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
long long Long64_t
Definition: RtypesCore.h:69
unsigned long long ULong64_t
Definition: RtypesCore.h:70
typedef void((*Func_t)())
TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
TOneTimeCallback(Callback_t &&f, unsigned int nSlots)
The head node of a RDF computation graph.
RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches)
const unsigned int fID
A unique ID that identifies the computation graph that starts with this RLoopManager.
bool CheckFilters(unsigned int, Long64_t) final
void EvalChildrenCounts()
Trigger counting of number of children nodes for each node of the functional graph.
void CleanUpNodes()
Perform clean-up operations. To be called at the end of each event loop.
void RunEmptySource()
Run event loop with no source files, in sequence.
const std::map< std::string, std::string > & GetAliasMap() const
std::function< void(unsigned int)> Callback_t
void Report(ROOT::RDF::RCutFlowReport &rep) const final
Call FillReport on all booked filters.
std::vector< RFilterBase * > fBookedNamedFilters
Contains a subset of fBookedFilters, i.e. only the named filters.
void RunEmptySourceMT()
Run event loop with no source files, in parallel.
ULong64_t GetNEmptyEntries() const
void DeRegisterCustomColumn(RCustomColumnBase *column)
RLoopManager & operator=(const RLoopManager &)=delete
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
::TDirectory * GetDirectory() const
std::vector< RDFInternal::RActionBase * > GetBookedActions()
std::shared_ptr< TTree > fTree
Shared pointer to the input TTree.
void RegisterCustomColumn(RCustomColumnBase *column)
std::string fToJitExec
Code that should be just-in-time executed right before the event loop.
void RunTreeReader()
Run event loop over one or multiple ROOT files, in sequence.
std::vector< RDFInternal::RActionBase * > GetAllActions()
For all the actions, either booked or run.
void ToJitDeclare(const std::string &s)
void ToJitExec(const std::string &s)
void CleanUpTask(unsigned int slot)
Perform clean-up operations. To be called at the end of each task execution.
std::vector< RDFInternal::RActionBase * > fRunActions
Non-owning pointers to actions already run.
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddFilterName(std::vector< std::string > &)
End of recursive chain of calls, does nothing.
std::vector< RRangeBase * > fBookedRanges
std::map< std::string, std::string > fAliasColumnNameMap
ColumnNameAlias-columnName pairs.
std::vector< TCallback > fCallbacks
Registered callbacks.
void RunAndCheckFilters(unsigned int slot, Long64_t entry)
Execute actions and make sure named filters are called for each event.
std::vector< RFilterBase * > fBookedFilters
std::vector< RDFInternal::RActionBase * > fBookedActions
Non-owning pointers to actions to be run.
void JitDeclarations()
Declare to the interpreter type aliases and other entities required by RDF jitted nodes.
std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode > GetGraph()
const ELoopType fLoopType
The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
void AddColumnAlias(const std::string &alias, const std::string &colName)
ColumnNames_t fValidBranchNames
Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
std::vector< RCustomColumnBase * > fCustomColumns
Non-owning container of all custom columns created so far.
void SetTree(const std::shared_ptr< TTree > &tree)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
std::vector< TOneTimeCallback > fCallbacksOnce
Registered callbacks to invoke just once before running the loop.
void RunDataSourceMT()
Run event loop over data accessed through a DataSource, in parallel.
void PartialReport(ROOT::RDF::RCutFlowReport &) const final
End of recursive chain of calls, does nothing.
std::string fToJitDeclare
Code that should be just-in-time declared right before the event loop.
std::vector< std::string > GetFiltersNames()
For each booked filter, returns either the name or "Unnamed Filter".
RLoopManager(const RLoopManager &)=delete
const std::unique_ptr< RDataSource > fDataSource
Owning pointer to a data-source object. Null if no data-source.
static unsigned int GetNextID()
const ColumnNames_t fDefaultColumns
void Book(RDFInternal::RActionBase *actionPtr)
void InitNodeSlots(TTreeReader *r, unsigned int slot)
Build TTreeReaderValues for all nodes This method loops over all filters, actions and other booked ob...
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
void RunDataSource()
Run event loop over data accessed through a DataSource, in sequence.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
void RunTreeProcessorMT()
Run event loop over one or multiple ROOT files, in parallel.
void Deregister(RDFInternal::RActionBase *actionPtr)
void InitNodes()
Initialize all nodes of the functional graph before running the event loop.
RLoopManager * GetLoopManagerUnchecked() final
Base class for non-leaf nodes of the computational graph.
Definition: RNodeBase.hxx:41
unsigned int fNStopsReceived
Number of times that a children node signaled to stop processing entries.
Definition: RNodeBase.hxx:45
unsigned int fNChildren
Number of nodes of the functional graph hanging from this object.
Definition: RNodeBase.hxx:44
Helper class that provides the operation graph nodes.
Class used to create the operation graph to be printed in the dot representation.
Definition: GraphNode.hxx:26
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Describe directory structure in memory.
Definition: TDirectory.h:34
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
A TTree represents a columnar dataset.
Definition: TTree.h:71
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:788
static constexpr double s
Definition: tree.py:1