Logo ROOT  
Reference Guide
RDFHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11// This header contains helper free functions that slim down RDataFrame's programming model
12
13#ifndef ROOT_RDF_HELPERS
14#define ROOT_RDF_HELPERS
15
16#include <ROOT/RDataFrame.hxx>
21#include <ROOT/TypeTraits.hxx>
22
23#include <algorithm> // std::transform
24#include <fstream>
25#include <functional>
26#include <memory>
27#include <type_traits>
28#include <utility> // std::index_sequence
29#include <vector>
30
31namespace ROOT {
32namespace Internal {
33namespace RDF {
34template <typename... ArgTypes, typename F>
36{
37 return std::function<bool(ArgTypes...)>([=](ArgTypes... args) mutable { return !f(args...); });
38}
39
40template <typename... ArgTypes, typename Ret, typename... Args>
42{
43 return std::function<bool(ArgTypes...)>([=](ArgTypes... args) mutable { return !f(args...); });
44}
45
46template <typename I, typename T, typename F>
48
49template <std::size_t... N, typename T, typename F>
50class PassAsVecHelper<std::index_sequence<N...>, T, F> {
51 template <std::size_t Idx>
52 using AlwaysT = T;
53 std::decay_t<F> fFunc;
54
55public:
56 PassAsVecHelper(F &&f) : fFunc(std::forward<F>(f)) {}
57 auto operator()(AlwaysT<N>... args) -> decltype(fFunc({args...})) { return fFunc({args...}); }
58};
59
60template <std::size_t N, typename T, typename F>
62{
63 return PassAsVecHelper<std::make_index_sequence<N>, T, F>(std::forward<F>(f));
64}
65
66} // namespace RDF
67} // namespace Internal
68
69namespace RDF {
71
72
73// clag-format off
74/// Given a callable with signature bool(T1, T2, ...) return a callable with same signature that returns the negated result
75///
76/// The callable must have one single non-template definition of operator(). This is a limitation with respect to
77/// std::not_fn, required for interoperability with RDataFrame.
78// clang-format on
79template <typename F,
80 typename Args = typename ROOT::TypeTraits::CallableTraits<std::decay_t<F>>::arg_types_nodecay,
81 typename Ret = typename ROOT::TypeTraits::CallableTraits<std::decay_t<F>>::ret_type>
82auto Not(F &&f) -> decltype(RDFInternal::NotHelper(Args(), std::forward<F>(f)))
83{
84 static_assert(std::is_same<Ret, bool>::value, "RDF::Not requires a callable that returns a bool.");
85 return RDFInternal::NotHelper(Args(), std::forward<F>(f));
86}
87
88// clang-format off
89/// PassAsVec is a callable generator that allows passing N variables of type T to a function as a single collection.
90///
91/// PassAsVec<N, T>(func) returns a callable that takes N arguments of type T, passes them down to function `func` as
92/// an initializer list `{t1, t2, t3,..., tN}` and returns whatever f({t1, t2, t3, ..., tN}) returns.
93///
94/// Note that for this to work with RDataFrame the type of all columns that the callable is applied to must be exactly T.
95/// Example usage together with RDataFrame ("varX" columns must all be `float` variables):
96/// \code
97/// bool myVecFunc(std::vector<float> args);
98/// df.Filter(PassAsVec<3, float>(myVecFunc), {"var1", "var2", "var3"});
99/// \endcode
100// clang-format on
101template <std::size_t N, typename T, typename F>
103{
105}
106
107// clang-format off
108/// Create a graphviz representation of the dataframe computation graph, return it as a string.
109/// \param[in] node any node of the graph. Called on the head (first) node, it prints the entire graph. Otherwise, only the branch the node belongs to.
110///
111/// The output can be displayed with a command akin to `dot -Tpng output.dot > output.png && open output.png`.
112///
113/// Note that "hanging" Defines, i.e. Defines without downstream nodes, will not be displayed by SaveGraph as they are
114/// effectively optimized away from the computation graph.
115///
116/// Note that SaveGraph is not thread-safe and must not be called concurrently from different threads.
117// clang-format on
118template <typename NodeType>
119std::string SaveGraph(NodeType node)
120{
122 return helper.RepresentGraph(node);
123}
124
125// clang-format off
126/// Create a graphviz representation of the dataframe computation graph, write it to the specified file.
127/// \param[in] node any node of the graph. Called on the head (first) node, it prints the entire graph. Otherwise, only the branch the node belongs to.
128/// \param[in] outputFile file where to save the representation.
129///
130/// The output can be displayed with a command akin to `dot -Tpng output.dot > output.png && open output.png`.
131///
132/// Note that "hanging" Defines, i.e. Defines without downstream nodes, will not be displayed by SaveGraph as they are
133/// effectively optimized away from the computation graph.
134///
135/// Note that SaveGraph is not thread-safe and must not be called concurrently from different threads.
136// clang-format on
137template <typename NodeType>
138void SaveGraph(NodeType node, const std::string &outputFile)
139{
141 std::string dotGraph = helper.RepresentGraph(node);
142
143 std::ofstream out(outputFile);
144 if (!out.is_open()) {
145 throw std::runtime_error("Could not open output file \"" + outputFile + "\"for reading");
146 }
147
148 out << dotGraph;
149 out.close();
150}
151
152// clang-format off
153/// Cast a RDataFrame node to the common type ROOT::RDF::RNode
154/// \param[in] node Any node of a RDataFrame graph
155// clang-format on
156template <typename NodeType>
157RNode AsRNode(NodeType node)
158{
159 return node;
160}
161
162// clang-format off
163/// Trigger the event loop of multiple RDataFrames concurrently
164/// \param[in] handles A vector of RResultHandles
165///
166/// This function triggers the event loop of all computation graphs which relate to the
167/// given RResultHandles. The advantage compared to running the event loop implicitly by accessing the
168/// RResultPtr is that the event loops will run concurrently. Therefore, the overall
169/// computation of all results is generally more efficient.
170/// It should be noted that user-defined operations (e.g., Filters and Defines) of the different RDataFrame graphs are assumed to be safe to call concurrently.
171///
172/// ~~~{.cpp}
173/// ROOT::RDataFrame df1("tree1", "file1.root");
174/// auto r1 = df1.Histo1D("var1");
175///
176/// ROOT::RDataFrame df2("tree2", "file2.root");
177/// auto r2 = df2.Sum("var2");
178///
179/// // RResultPtr -> RResultHandle conversion is automatic
180/// ROOT::RDF::RunGraphs({r1, r2});
181/// ~~~
182// clang-format on
183void RunGraphs(std::vector<RResultHandle> handles);
184
185namespace Experimental {
186
187/// \brief Produce all required systematic variations for the given result.
188/// \param[in] resPtr The result for which variations should be produced.
189/// \return A \ref ROOT::RDF::Experimental::RResultMap "RResultMap" object with full variation names as strings
190/// (e.g. "pt:down") and the corresponding varied results as values.
191///
192/// A given input RResultPtr<T> produces a corresponding RResultMap<T> with a "nominal"
193/// key that will return a value identical to the one contained in the original RResultPtr.
194/// Other keys correspond to the varied values of this result, one for each variation
195/// that the result depends on.
196/// VariationsFor does not trigger the event loop. The event loop is only triggered
197/// upon first access to a valid key, similarly to what happens with RResultPtr.
198///
199/// If the result does not depend, directly or indirectly, from any registered systematic variation, the
200/// returned RResultMap will contain only the "nominal" key.
201///
202/// See RDataFrame's \ref ROOT::RDF::RInterface::Vary() "Vary" method for more information and example usages.
203///
204/// \note Currently, producing variations for the results of \ref ROOT::RDF::RInterface::Display() "Display",
205/// \ref ROOT::RDF::RInterface::Report() "Report" and \ref ROOT::RDF::RInterface::Snapshot() "Snapshot"
206/// actions is not supported.
207//
208// TODO The current implementation duplicates work for the nominal value. In principle we could rewire things
209// so that the nominal value is calculated only once, either in the original action or in the varied action.
210//
211// An overview of how systematic variations work internally. Given N variations (including the nominal):
212//
213// RResultMap owns RVariedAction
214// N results N action helpers
215// N previous filters
216// N*#input_cols column readers
217//
218// ...and each RFilter and RDefine knows for what universe it needs to construct column readers ("nominal" by default).
219//
220//
221template <typename T>
223{
224 R__ASSERT(resPtr != nullptr && "Calling VariationsFor on an empty RResultPtr");
225
226 // populate parts of the computation graph for which we only have "empty shells", e.g. RJittedActions and
227 // RJittedFilters
228 resPtr.fLoopManager->Jit();
229
230 std::shared_ptr<RDFInternal::RActionBase> action = resPtr.fActionPtr;
231
232 // clone the result once for each variation
233 std::vector<std::string> variations = action->GetVariations();
234 variations.insert(variations.begin(), "nominal");
235 const auto nVariations = variations.size();
236 std::vector<std::shared_ptr<T>> results;
237 results.reserve(nVariations);
238 for (auto i = 0u; i < nVariations; ++i)
239 results.emplace_back(new T{*resPtr.fObjPtr}); // implicitly assuming that T is copiable: this should be the case
240 // for all result types in use, as they are copied for each slot
241
242 std::vector<void *> typeErasedResults;
243 typeErasedResults.reserve(results.size());
244 for (auto &res : results)
245 typeErasedResults.emplace_back(&res);
246
247 // create the RVariedAction and inject it in the computation graph
248 // this recursively creates all the required varied column readers and upstream nodes of the computation graph
249 std::unique_ptr<RDFInternal::RActionBase> variedAction{
250 resPtr.fActionPtr->MakeVariedAction(std::move(typeErasedResults))};
251 resPtr.fLoopManager->Book(variedAction.get());
252
253 return RDFInternal::MakeResultMap<T>(std::move(results), std::move(variations), *resPtr.fLoopManager,
254 std::move(variedAction));
255}
256
257} // namespace Experimental
258} // namespace RDF
259} // namespace ROOT
260#endif
#define f(i)
Definition: RSha256.hxx:104
#define R__ASSERT(e)
Definition: TError.h:118
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
TRObject operator()(const T1 &t1) const
void Book(RDFInternal::RActionBase *actionPtr)
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
std::string RepresentGraph(ROOT::RDataFrame &rDataFrame)
Starting from the root node, prints the entire graph.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:104
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:103
RDFDetail::RLoopManager * fLoopManager
Non-owning pointer to the RLoopManager at the root of this computation graph.
Definition: RResultPtr.hxx:154
std::shared_ptr< RDFInternal::RActionBase > fActionPtr
Owning pointer to the action that will produce this result.
Definition: RResultPtr.hxx:158
#define F(x, y, z)
std::function< bool(ArgTypes...)> NotHelper(ROOT::TypeTraits::TypeList< ArgTypes... >, Ret(*f)(Args...))
Definition: RDFHelpers.hxx:41
std::function< bool(ArgTypes...)> NotHelper(ROOT::TypeTraits::TypeList< ArgTypes... >, F &&f)
Definition: RDFHelpers.hxx:35
auto PassAsVec(F &&f) -> PassAsVecHelper< std::make_index_sequence< N >, T, F >
Definition: RDFHelpers.hxx:61
double T(double x)
Definition: ChebyshevPol.h:34
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
Definition: RDFHelpers.hxx:222
auto PassAsVec(F &&f) -> RDFInternal::PassAsVecHelper< std::make_index_sequence< N >, T, F >
PassAsVec is a callable generator that allows passing N variables of type T to a function as a single...
Definition: RDFHelpers.hxx:102
void RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
Definition: RDFHelpers.cxx:23
auto Not(F &&f) -> decltype(RDFInternal::NotHelper(Args(), std::forward< F >(f)))
Given a callable with signature bool(T1, T2, ...) return a callable with same signature that returns ...
Definition: RDFHelpers.hxx:82
std::string SaveGraph(NodeType node)
Create a graphviz representation of the dataframe computation graph, return it as a string.
Definition: RDFHelpers.hxx:119
RNode AsRNode(NodeType node)
Cast a RDataFrame node to the common type ROOT::RDF::RNode.
Definition: RDFHelpers.hxx:157
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:546
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25