Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RResultPtr.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RRESULTPTR
12#define ROOT_RRESULTPTR
13
15#include "RtypesCore.h"
17#include "ROOT/TypeTraits.hxx"
18#include "TError.h" // Warning
19
20#include <memory>
21#include <functional>
22#include <type_traits> // std::is_constructible
23
24namespace ROOT {
25namespace RDF {
26template <typename T>
27class RResultPtr;
28
29namespace Experimental {
30template <typename T>
31class RResultMap;
32
33template <typename T>
34RResultMap<T> VariationsFor(RResultPtr<T> resPtr);
35} // namespace Experimental
36
37template <typename Proxied, typename DataSource>
38class RInterface;
39} // namespace RDF
40
41namespace Internal {
42namespace RDF {
44/**
45 * \brief Creates a new RResultPtr with a cloned action.
46 *
47 * \tparam T The type of the result held by the RResultPtr.
48 * \param inptr The pointer.
49 * \return A new pointer with a cloned action.
50 */
51template <typename T>
53{
54 // We call the copy constructor, to copy also the metadata of certain
55 // result types, e.g. a for a TH1D we have to create a new histogram with
56 // the same binning and axis limits.
57 std::shared_ptr<T> copiedResult{new T{*inptr.fObjPtr}};
58 return ROOT::RDF::RResultPtr<T>(copiedResult, inptr.fLoopManager,
59 inptr.fActionPtr->CloneAction(reinterpret_cast<void *>(&copiedResult)));
60}
61
63/**
64 * \brief Creates a new RResultPtr with a cloned Snapshot action.
65 *
66 * \param inptr The pointer.
67 * \param outputFileName A new name for the output file of the cloned action.
68 * \return A new pointer with a cloned action.
69 *
70 * This overload is needed since cloning a Snapshot node usually also involves
71 * changing the name of the output file, otherwise the cloned Snapshot would
72 * overwrite the same file.
73 */
74SnapshotPtr_t CloneResultAndAction(const SnapshotPtr_t &inptr, const std::string &outputFileName);
75} // namespace RDF
76} // namespace Internal
77
78namespace Detail {
79namespace RDF {
81// Fwd decl for RResultPtr
82template <typename T>
83RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df,
84 std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr);
85
86// Fwd decl for GetMergeableValue
87template <typename T>
88class RMergeableValue;
89
90template <typename T>
91std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr);
92} // namespace RDF
93} // namespace Detail
94namespace RDF {
97namespace TTraits = ROOT::TypeTraits;
98
99/// Smart pointer for the return type of actions
100/**
101\class ROOT::RDF::RResultPtr
102\ingroup dataframe
103\brief A wrapper around the result of RDataFrame actions able to trigger calculations lazily.
104\tparam T Type of the action result
105
106A smart pointer which allows to access the result of a RDataFrame action. The
107methods of the encapsulated object can be accessed via the arrow operator.
108Upon invocation of the arrow operator or dereferencing (`operator*`), the
109loop on the events and calculations of all scheduled actions are executed
110if needed.
111It is possible to iterate on the result proxy if the proxied object is a collection.
112~~~{.cpp}
113for (auto& myItem : myResultProxy) { ... };
114~~~
115If iteration is not supported by the type of the proxied object, a compilation error is thrown.
116
117*/
118template <typename T>
120 // private using declarations
121 using SPT_t = std::shared_ptr<T>;
122
123 // friend declarations
124 template <typename T1>
125 friend class RResultPtr;
126
127 template <typename T1>
129 std::shared_ptr<RDFInternal::RActionBase>);
130
131 template <typename T1>
133
134 template <class T1, class T2>
135 friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
136 template <class T1, class T2>
137 friend bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
138 template <class T1>
139 friend bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
140 template <class T1>
141 friend bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
142 template <class T1>
143 friend bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
144 template <class T1>
145 friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
146 friend std::unique_ptr<RDFDetail::RMergeableValue<T>> RDFDetail::GetMergeableValue<T>(RResultPtr<T> &rptr);
147
149
150 friend class RResultHandle;
151
152 friend RResultPtr<T> ROOT::Internal::RDF::CloneResultAndAction<T>(const RResultPtr<T> &inptr);
155 const std::string &outputFileName);
156 /// \cond HIDDEN_SYMBOLS
157 template <typename V, bool hasBeginEnd = TTraits::HasBeginAndEnd<V>::value>
158 struct RIterationHelper {
159 using Iterator_t = void;
160 void GetBegin(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask begin for this class."); }
161 void GetEnd(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask end for this class."); }
162 };
163
164 template <typename V>
165 struct RIterationHelper<V, true> {
166 using Iterator_t = decltype(std::begin(std::declval<V>()));
167 static Iterator_t GetBegin(const V &v) { return std::begin(v); };
168 static Iterator_t GetEnd(const V &v) { return std::end(v); };
169 };
170 /// \endcond
171
172 /// Non-owning pointer to the RLoopManager at the root of this computation graph.
173 /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr.
175 SPT_t fObjPtr; ///< Shared pointer encapsulating the wrapped result
176 /// Owning pointer to the action that will produce this result.
177 /// Ownership is shared with other copies of this ResultPtr.
178 std::shared_ptr<RDFInternal::RActionBase> fActionPtr;
179
180 /// Triggers the event loop in the RLoopManager
181 void TriggerRun();
182
183 /// Get the pointer to the encapsulated result.
184 /// Ownership is not transferred to the caller.
185 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
186 T *Get()
187 {
188 if (fActionPtr != nullptr && !fActionPtr->HasRun())
189 TriggerRun();
190 return fObjPtr.get();
191 }
192
194 {
195 if (fObjPtr == nullptr)
196 throw std::runtime_error("Trying to access the contents of a null RResultPtr.");
197 }
198
199 RResultPtr(std::shared_ptr<T> objPtr, RDFDetail::RLoopManager *lm,
200 std::shared_ptr<RDFInternal::RActionBase> actionPtr)
201 : fLoopManager(lm), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr))
202 {
203 }
204
205public:
206 using Value_t = T; ///< Convenience alias to simplify access to proxied type
207 static constexpr ULong64_t kOnce = 0ull; ///< Convenience definition to express a callback must be executed once
208
209 RResultPtr() = default;
210 RResultPtr(const RResultPtr &) = default;
211 RResultPtr(RResultPtr &&) = default;
212 RResultPtr &operator=(const RResultPtr &) = default;
214 explicit operator bool() const { return bool(fObjPtr); }
215
216 /// Convert a RResultPtr<T2> to a RResultPtr<T>.
217 ///
218 /// Useful e.g. to store a number of RResultPtr<TH1D> and RResultPtr<TH2D> in a std::vector<RResultPtr<TH1>>.
219 /// The requirements on T2 and T are the same as for conversion between std::shared_ptr<T2> and std::shared_ptr<T>.
220 template <typename T2,
221 std::enable_if_t<std::is_constructible<std::shared_ptr<T>, std::shared_ptr<T2>>::value, int> = 0>
223 {
224 }
225
226 /// Get a const reference to the encapsulated object.
227 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
228 const T &GetValue()
229 {
230 ThrowIfNull();
231 return *Get();
232 }
233
234 /// Get the pointer to the encapsulated object.
235 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
236 T *GetPtr() { return Get(); }
237
238 /// Get a pointer to the encapsulated object.
239 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
241 {
242 ThrowIfNull();
243 return *Get();
244 }
245
246 /// Get a pointer to the encapsulated object.
247 /// Ownership is not transferred to the caller.
248 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
250 {
251 ThrowIfNull();
252 return Get();
253 }
254
255 /// Return an iterator to the beginning of the contained object if this makes
256 /// sense, throw a compilation error otherwise
257 typename RIterationHelper<T>::Iterator_t begin()
258 {
259 ThrowIfNull();
260 if (!fActionPtr->HasRun())
261 TriggerRun();
262 return RIterationHelper<T>::GetBegin(*fObjPtr);
263 }
264
265 /// Return an iterator to the end of the contained object if this makes
266 /// sense, throw a compilation error otherwise
267 typename RIterationHelper<T>::Iterator_t end()
268 {
269 ThrowIfNull();
270 if (!fActionPtr->HasRun())
271 TriggerRun();
272 return RIterationHelper<T>::GetEnd(*fObjPtr);
273 }
274
275 // clang-format off
276 /// Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
277 ///
278 /// \param[in] everyNEvents Frequency at which the callback will be called, as a number of events processed
279 /// \param[in] callback a callable with signature `void(Value_t&)` where Value_t is the type of the value contained in this RResultPtr
280 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
281 ///
282 /// The callback must be a callable (lambda, function, functor class...) that takes a reference to the result type as
283 /// argument and returns nothing. RDataFrame will invoke registered callbacks passing partial action results as
284 /// arguments to them (e.g. a histogram filled with a part of the selected events, a counter incremented only up to a
285 /// certain point, a mean over a subset of the events and so forth).
286 ///
287 /// Callbacks can be used e.g. to inspect partial results of the analysis while the event loop is running. For
288 /// example one can draw an up-to-date version of a result histogram every 100 entries like this:
289 /// \code{.cpp}
290 /// auto h = tdf.Histo1D("x");
291 /// TCanvas c("c","x hist");
292 /// h.OnPartialResult(100, [&c](TH1D &h_) { c.cd(); h_.Draw(); c.Update(); });
293 /// h->Draw(); // event loop runs here, this `Draw` is executed after the event loop is finished
294 /// \endcode
295 ///
296 /// A value of 0 for everyNEvents indicates the callback must be executed only once, before running the event loop.
297 /// A conveniece definition `kOnce` is provided to make this fact more expressive in user code (see snippet below).
298 /// Multiple callbacks can be registered with the same RResultPtr (i.e. results of RDataFrame actions) and will
299 /// be executed sequentially. Callbacks are executed in the order they were registered.
300 /// The type of the value contained in a RResultPtr is also available as RResultPtr<T>::Value_t, e.g.
301 /// \code{.cpp}
302 /// auto h = tdf.Histo1D("x");
303 /// // h.kOnce is 0
304 /// // decltype(h)::Value_t is TH1D
305 /// \endcode
306 ///
307 /// When implicit multi-threading is enabled, the callback:
308 /// - will never be executed by multiple threads concurrently: it needs not be thread-safe. For example the snippet
309 /// above that draws the partial histogram on a canvas works seamlessly in multi-thread event loops.
310 /// - will always be executed "everyNEvents": partial results will "contain" that number of events more from
311 /// one call to the next
312 /// - might be executed by a different worker thread at different times: the value of `std::this_thread::get_id()`
313 /// might change between calls
314 ///
315 /// To register a callback that is called by _each_ worker thread (concurrently) every N events one can use
316 /// OnPartialResultSlot().
317 // clang-format on
318 RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback)
319 {
320 ThrowIfNull();
321 const auto nSlots = fLoopManager->GetNSlots();
322 auto actionPtr = fActionPtr;
323 auto c = [nSlots, actionPtr, callback](unsigned int slot) {
324 if (slot != nSlots - 1)
325 return;
326 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
327 callback(*partialResult);
328 };
329 fLoopManager->RegisterCallback(everyNEvents, std::move(c));
330 return *this;
331 }
332
333 // clang-format off
334 /// Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's partial result.
335 ///
336 /// \param[in] everyNEvents Frequency at which the callback will be called by each thread, as a number of events processed
337 /// \param[in] callback A callable with signature `void(unsigned int, Value_t&)` where Value_t is the type of the value contained in this RResultPtr
338 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
339 ///
340 /// See `OnPartialResult` for a generic explanation of the callback mechanism.
341 /// Compared to `OnPartialResult`, this method has two major differences:
342 /// - all worker threads invoke the callback once every specified number of events. The event count is per-thread,
343 /// and callback invocation might happen concurrently (i.e. the callback must be thread-safe)
344 /// - the callable must take an extra `unsigned int` parameter corresponding to a multi-thread "processing slot":
345 /// this is a "helper value" to simplify writing thread-safe callbacks: different worker threads might invoke the
346 /// callback concurrently but always with different `slot` numbers.
347 /// - a value of 0 for everyNEvents indicates the callback must be executed once _per slot_.
348 ///
349 /// For example, the following snippet prints out a thread-safe progress bar of the events processed by RDataFrame
350 /// \code
351 /// auto c = tdf.Count(); // any action would do, but `Count` is the most lightweight
352 /// std::string progress;
353 /// std::mutex bar_mutex;
354 /// c.OnPartialResultSlot(nEvents / 100, [&progress, &bar_mutex](unsigned int, ULong64_t &) {
355 /// std::lock_guard<std::mutex> lg(bar_mutex);
356 /// progress.push_back('#');
357 /// std::cout << "\r[" << std::left << std::setw(100) << progress << ']' << std::flush;
358 /// });
359 /// std::cout << "Analysis running..." << std::endl;
360 /// *c; // trigger the event loop by accessing an action's result
361 /// std::cout << "\nDone!" << std::endl;
362 /// \endcode
363 // clang-format on
364 RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback)
365 {
366 ThrowIfNull();
367 auto actionPtr = fActionPtr;
368 auto c = [actionPtr, callback](unsigned int slot) {
369 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
370 callback(slot, *partialResult);
371 };
372 fLoopManager->RegisterCallback(everyNEvents, std::move(c));
373 return *this;
374 }
375
376 // clang-format off
377 /// Check whether the result has already been computed
378 ///
379 /// ~~~{.cpp}
380 /// auto res = df.Count();
381 /// res.IsReady(); // false, access will trigger event loop
382 /// std::cout << *res << std::endl; // triggers event loop
383 /// res.IsReady(); // true
384 /// ~~~
385 // clang-format on
386 bool IsReady() const
387 {
388 if (fActionPtr == nullptr)
389 return false;
390 return fActionPtr->HasRun();
391 }
392};
393
394template <typename T>
396{
397 fLoopManager->Run();
398}
399
400template <class T1, class T2>
401bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
402{
403 return lhs.fObjPtr == rhs.fObjPtr;
404}
405
406template <class T1, class T2>
407bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
408{
409 return lhs.fObjPtr != rhs.fObjPtr;
410}
411
412template <class T1>
413bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
414{
415 return lhs.fObjPtr == rhs;
416}
417
418template <class T1>
419bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
420{
421 return lhs == rhs.fObjPtr;
422}
423
424template <class T1>
425bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
426{
427 return lhs.fObjPtr != rhs;
428}
429
430template <class T1>
431bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
432{
433 return lhs != rhs.fObjPtr;
434}
435
436} // namespace RDF
437
438namespace Detail {
439namespace RDF {
440/// Create a RResultPtr and set its pointer to the corresponding RAction
441/// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr.
442template <typename T>
443RResultPtr<T>
444MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr)
445{
446 return RResultPtr<T>(r, &lm, std::move(actionPtr));
447}
448
449////////////////////////////////////////////////////////////////////////////////
450/// \brief Retrieve a mergeable value from an RDataFrame action.
451/// \param[in] rptr lvalue reference of an RResultPtr object.
452/// \returns An RMergeableValue holding the result of the action, wrapped in an
453/// `std::unique_ptr`.
454///
455/// This function triggers the execution of the RDataFrame computation graph.
456/// Then retrieves an RMergeableValue object created with the result wrapped by
457/// the RResultPtr argument. The user obtains ownership of the mergeable, which
458/// in turn holds a copy of the result of the action. The RResultPtr is not
459/// destroyed in the process and will still retain (shared) ownership of the
460/// original result.
461///
462/// Example usage:
463/// ~~~{.cpp}
464/// using namespace ROOT::Detail::RDF;
465/// ROOT::RDataFrame d("myTree", "file_*.root");
466/// auto h = d.Histo1D("Branch_A");
467/// auto mergeablehisto = GetMergeableValue(h);
468/// ~~~
469template <typename T>
470std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr)
471{
472 rptr.ThrowIfNull();
473 if (!rptr.fActionPtr->HasRun())
474 rptr.TriggerRun(); // Prevents from using `const` specifier in parameter
475 return std::unique_ptr<RMergeableValue<T>>{
476 static_cast<RMergeableValue<T> *>(rptr.fActionPtr->GetMergeableValue().release())};
477}
478} // namespace RDF
479} // namespace Detail
480} // namespace ROOT
481
482#endif // ROOT_TRESULTPROXY
#define c(i)
Definition RSha256.hxx:101
unsigned long long ULong64_t
Definition RtypesCore.h:81
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The head node of a RDF computation graph.
void Run(bool jit=true)
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
A result of an RDataFrame execution, that knows how to merge with other results of the same type.
Helper class that provides the operation graph nodes.
A type-erased version of RResultPtr and RResultMap.
Smart pointer for the return type of actions.
bool IsReady() const
Check whether the result has already been computed.
void TriggerRun()
Triggers the event loop in the RLoopManager.
RResultPtr(RResultPtr &&)=default
T * GetPtr()
Get the pointer to the encapsulated object.
RResultPtr< T > & OnPartialResult(ULong64_t everyNEvents, std::function< void(T &)> callback)
Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
RDFDetail::RLoopManager * fLoopManager
Non-owning pointer to the RLoopManager at the root of this computation graph.
friend bool operator!=(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
RIterationHelper< T >::Iterator_t begin()
Return an iterator to the beginning of the contained object if this makes sense, throw a compilation ...
T Value_t
Convenience alias to simplify access to proxied type.
T * Get()
Get the pointer to the encapsulated result.
const T & GetValue()
Get a const reference to the encapsulated object.
RResultPtr(const RResultPtr &)=default
T & operator*()
Get a pointer to the encapsulated object.
RResultPtr & operator=(const RResultPtr &)=default
RResultPtr< T > & OnPartialResultSlot(ULong64_t everyNEvents, std::function< void(unsigned int, T &)> callback)
Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's ...
RResultPtr & operator=(RResultPtr &&)=default
friend bool operator==(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
RResultPtr(const RResultPtr< T2 > &r)
Convert a RResultPtr<T2> to a RResultPtr<T>.
std::shared_ptr< RDFInternal::RActionBase > fActionPtr
Owning pointer to the action that will produce this result.
std::shared_ptr< T > SPT_t
T * operator->()
Get a pointer to the encapsulated object.
SPT_t fObjPtr
Shared pointer encapsulating the wrapped result.
static constexpr ULong64_t kOnce
Convenience definition to express a callback must be executed once.
RResultPtr(std::shared_ptr< T > objPtr, RDFDetail::RLoopManager *lm, std::shared_ptr< RDFInternal::RActionBase > actionPtr)
RIterationHelper< T >::Iterator_t end()
Return an iterator to the end of the contained object if this makes sense, throw a compilation error ...
#define T2
Definition md5.inl:147
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
ROOT::RDF::Experimental::RResultMap< T > CloneResultAndAction(const ROOT::RDF::Experimental::RResultMap< T > &inmap)
Clones an RResultMap and its corresponding RVariedAction.
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
bool operator!=(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
bool operator==(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
ROOT type_traits extensions.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.