Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11// This header contains helper free functions that slim down RDataFrame's programming model
12
13#ifndef ROOT_RDF_HELPERS
14#define ROOT_RDF_HELPERS
15
16#include <ROOT/RDataFrame.hxx>
21#include <ROOT/TypeTraits.hxx>
22
23#include <algorithm> // std::transform
24#include <fstream>
25#include <functional>
26#include <iostream>
27#include <memory>
28#include <type_traits>
29#include <utility> // std::index_sequence
30#include <vector>
31
32namespace ROOT {
33namespace Internal {
34namespace RDF {
35template <typename... ArgTypes, typename F>
37{
38 return std::function<bool(ArgTypes...)>([=](ArgTypes... args) mutable { return !f(args...); });
39}
40
41template <typename... ArgTypes, typename Ret, typename... Args>
42std::function<bool(ArgTypes...)> NotHelper(ROOT::TypeTraits::TypeList<ArgTypes...>, Ret (*f)(Args...))
43{
44 return std::function<bool(ArgTypes...)>([=](ArgTypes... args) mutable { return !f(args...); });
45}
46
47template <typename I, typename T, typename F>
49
50template <std::size_t... N, typename T, typename F>
51class PassAsVecHelper<std::index_sequence<N...>, T, F> {
52 template <std::size_t Idx>
53 using AlwaysT = T;
54 std::decay_t<F> fFunc;
55
56public:
57 PassAsVecHelper(F &&f) : fFunc(std::forward<F>(f)) {}
58 auto operator()(AlwaysT<N>... args) -> decltype(fFunc({args...})) { return fFunc({args...}); }
59};
60
61template <std::size_t N, typename T, typename F>
63{
64 return PassAsVecHelper<std::make_index_sequence<N>, T, F>(std::forward<F>(f));
65}
66
67} // namespace RDF
68} // namespace Internal
69
70namespace RDF {
72
73
74// clag-format off
75/// Given a callable with signature bool(T1, T2, ...) return a callable with same signature that returns the negated result
76///
77/// The callable must have one single non-template definition of operator(). This is a limitation with respect to
78/// std::not_fn, required for interoperability with RDataFrame.
79// clang-format on
80template <typename F,
81 typename Args = typename ROOT::TypeTraits::CallableTraits<std::decay_t<F>>::arg_types_nodecay,
82 typename Ret = typename ROOT::TypeTraits::CallableTraits<std::decay_t<F>>::ret_type>
83auto Not(F &&f) -> decltype(RDFInternal::NotHelper(Args(), std::forward<F>(f)))
84{
85 static_assert(std::is_same<Ret, bool>::value, "RDF::Not requires a callable that returns a bool.");
86 return RDFInternal::NotHelper(Args(), std::forward<F>(f));
87}
88
89// clang-format off
90/// PassAsVec is a callable generator that allows passing N variables of type T to a function as a single collection.
91///
92/// PassAsVec<N, T>(func) returns a callable that takes N arguments of type T, passes them down to function `func` as
93/// an initializer list `{t1, t2, t3,..., tN}` and returns whatever f({t1, t2, t3, ..., tN}) returns.
94///
95/// Note that for this to work with RDataFrame the type of all columns that the callable is applied to must be exactly T.
96/// Example usage together with RDataFrame ("varX" columns must all be `float` variables):
97/// \code
98/// bool myVecFunc(std::vector<float> args);
99/// df.Filter(PassAsVec<3, float>(myVecFunc), {"var1", "var2", "var3"});
100/// \endcode
101// clang-format on
102template <std::size_t N, typename T, typename F>
104{
106}
107
108// clang-format off
109/// Create a graphviz representation of the dataframe computation graph, return it as a string.
110/// \param[in] node any node of the graph. Called on the head (first) node, it prints the entire graph. Otherwise, only the branch the node belongs to.
111///
112/// The output can be displayed with a command akin to `dot -Tpng output.dot > output.png && open output.png`.
113///
114/// Note that "hanging" Defines, i.e. Defines without downstream nodes, will not be displayed by SaveGraph as they are
115/// effectively optimized away from the computation graph.
116///
117/// Note that SaveGraph is not thread-safe and must not be called concurrently from different threads.
118// clang-format on
119template <typename NodeType>
120std::string SaveGraph(NodeType node)
121{
123 return helper(node);
124}
125
126// clang-format off
127/// Create a graphviz representation of the dataframe computation graph, write it to the specified file.
128/// \param[in] node any node of the graph. Called on the head (first) node, it prints the entire graph. Otherwise, only the branch the node belongs to.
129/// \param[in] outputFile file where to save the representation.
130///
131/// The output can be displayed with a command akin to `dot -Tpng output.dot > output.png && open output.png`.
132///
133/// Note that "hanging" Defines, i.e. Defines without downstream nodes, will not be displayed by SaveGraph as they are
134/// effectively optimized away from the computation graph.
135///
136/// Note that SaveGraph is not thread-safe and must not be called concurrently from different threads.
137// clang-format on
138template <typename NodeType>
139void SaveGraph(NodeType node, const std::string &outputFile)
140{
142 std::string dotGraph = helper(node);
143
144 std::ofstream out(outputFile);
145 if (!out.is_open()) {
146 throw std::runtime_error("Could not open output file \"" + outputFile + "\"for reading");
147 }
148
149 out << dotGraph;
150 out.close();
151}
152
153// clang-format off
154/// Cast a RDataFrame node to the common type ROOT::RDF::RNode
155/// \param[in] node Any node of a RDataFrame graph
156// clang-format on
157template <typename NodeType>
158RNode AsRNode(NodeType node)
159{
160 return node;
161}
162
163// clang-format off
164/// Trigger the event loop of multiple RDataFrames concurrently
165/// \param[in] handles A vector of RResultHandles
166///
167/// This function triggers the event loop of all computation graphs which relate to the
168/// given RResultHandles. The advantage compared to running the event loop implicitly by accessing the
169/// RResultPtr is that the event loops will run concurrently. Therefore, the overall
170/// computation of all results is generally more efficient.
171/// It should be noted that user-defined operations (e.g., Filters and Defines) of the different RDataFrame graphs are assumed to be safe to call concurrently.
172///
173/// ~~~{.cpp}
174/// ROOT::RDataFrame df1("tree1", "file1.root");
175/// auto r1 = df1.Histo1D("var1");
176///
177/// ROOT::RDataFrame df2("tree2", "file2.root");
178/// auto r2 = df2.Sum("var2");
179///
180/// // RResultPtr -> RResultHandle conversion is automatic
181/// ROOT::RDF::RunGraphs({r1, r2});
182/// ~~~
183// clang-format on
184void RunGraphs(std::vector<RResultHandle> handles);
185
186namespace Experimental {
187
188/// \brief Produce all required systematic variations for the given result.
189/// \param[in] resPtr The result for which variations should be produced.
190/// \return A \ref ROOT::RDF::Experimental::RResultMap "RResultMap" object with full variation names as strings
191/// (e.g. "pt:down") and the corresponding varied results as values.
192///
193/// A given input RResultPtr<T> produces a corresponding RResultMap<T> with a "nominal"
194/// key that will return a value identical to the one contained in the original RResultPtr.
195/// Other keys correspond to the varied values of this result, one for each variation
196/// that the result depends on.
197/// VariationsFor does not trigger the event loop. The event loop is only triggered
198/// upon first access to a valid key, similarly to what happens with RResultPtr.
199///
200/// If the result does not depend, directly or indirectly, from any registered systematic variation, the
201/// returned RResultMap will contain only the "nominal" key.
202///
203/// See RDataFrame's \ref ROOT::RDF::RInterface::Vary() "Vary" method for more information and example usages.
204///
205/// \note Currently, producing variations for the results of \ref ROOT::RDF::RInterface::Display() "Display",
206/// \ref ROOT::RDF::RInterface::Report() "Report" and \ref ROOT::RDF::RInterface::Snapshot() "Snapshot"
207/// actions is not supported.
208//
209// An overview of how systematic variations work internally. Given N variations (including the nominal):
210//
211// RResultMap owns RVariedAction
212// N results N action helpers
213// N previous filters
214// N*#input_cols column readers
215//
216// ...and each RFilter and RDefine knows for what universe it needs to construct column readers ("nominal" by default).
217template <typename T>
219{
220 R__ASSERT(resPtr != nullptr && "Calling VariationsFor on an empty RResultPtr");
221
222 // populate parts of the computation graph for which we only have "empty shells", e.g. RJittedActions and
223 // RJittedFilters
224 resPtr.fLoopManager->Jit();
225
226 std::unique_ptr<RDFInternal::RActionBase> variedAction;
227 std::vector<std::shared_ptr<T>> variedResults;
228
229 std::shared_ptr<RDFInternal::RActionBase> nominalAction = resPtr.fActionPtr;
230 std::vector<std::string> variations = nominalAction->GetVariations();
231 const auto nVariations = variations.size();
232
233 if (nVariations > 0) {
234 // clone the result once for each variation
235 variedResults.reserve(nVariations);
236 for (auto i = 0u; i < nVariations; ++i)
237 // implicitly assuming that T is copiable: this should be the case
238 // for all result types in use, as they are copied for each slot
239 variedResults.emplace_back(new T{*resPtr.fObjPtr});
240
241 std::vector<void *> typeErasedResults;
242 typeErasedResults.reserve(variedResults.size());
243 for (auto &res : variedResults)
244 typeErasedResults.emplace_back(&res);
245
246 // Create the RVariedAction and inject it in the computation graph.
247 // This recursively creates all the required varied column readers and upstream nodes of the computation graph.
248 variedAction = nominalAction->MakeVariedAction(std::move(typeErasedResults));
249 }
250
251 return RDFInternal::MakeResultMap<T>(resPtr.fObjPtr, std::move(variedResults), std::move(variations),
252 *resPtr.fLoopManager, std::move(nominalAction), std::move(variedAction));
253}
254
255} // namespace Experimental
256} // namespace RDF
257} // namespace ROOT
258#endif
#define f(i)
Definition RSha256.hxx:104
#define R__ASSERT(e)
Definition TError.h:118
#define N
TRObject operator()(const T1 &t1) const
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
The public interface to the RDataFrame federation of classes.
Smart pointer for the return type of actions.
RDFDetail::RLoopManager * fLoopManager
Non-owning pointer to the RLoopManager at the root of this computation graph.
std::shared_ptr< RDFInternal::RActionBase > fActionPtr
Owning pointer to the action that will produce this result.
SPT_t fObjPtr
Shared pointer encapsulating the wrapped result.
#define F(x, y, z)
std::function< bool(ArgTypes...)> NotHelper(ROOT::TypeTraits::TypeList< ArgTypes... >, F &&f)
auto PassAsVec(F &&f) -> PassAsVecHelper< std::make_index_sequence< N >, T, F >
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
void RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
auto Not(F &&f) -> decltype(RDFInternal::NotHelper(Args(), std::forward< F >(f)))
Given a callable with signature bool(T1, T2, ...) return a callable with same signature that returns ...
std::string SaveGraph(NodeType node)
Create a graphviz representation of the dataframe computation graph, return it as a string.
RNode AsRNode(NodeType node)
Cast a RDataFrame node to the common type ROOT::RDF::RNode.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Lightweight storage for a collection of types.