Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RResultPtr.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RRESULTPTR
12#define ROOT_RRESULTPTR
13
15#include "RtypesCore.h"
17#include "ROOT/TypeTraits.hxx"
18#include "TError.h" // Warning
19
20#include <memory>
21#include <functional>
22#include <type_traits> // std::is_constructible
23
24namespace ROOT {
25namespace RDF {
26template <typename T>
27class RResultPtr;
28
29namespace Experimental {
30template <typename T>
31class RResultMap;
32
33template <typename T>
35} // namespace Experimental
36
37template <typename Proxied, typename DataSource>
38class RInterface;
39} // namespace RDF
40
41namespace Internal {
42namespace RDF {
44/**
45 * \brief Creates a new RResultPtr with a cloned action.
46 *
47 * \tparam T The type of the result held by the RResultPtr.
48 * \param inptr The pointer.
49 * \return A new pointer with a cloned action.
50 */
51template <typename T>
53{
54 // We call the copy constructor, to copy also the metadata of certain
55 // result types, e.g. a for a TH1D we have to create a new histogram with
56 // the same binning and axis limits.
57 std::shared_ptr<T> copiedResult{new T{*inptr.fObjPtr}};
58 return ROOT::RDF::RResultPtr<T>(copiedResult, inptr.fLoopManager,
59 inptr.fActionPtr->CloneAction(reinterpret_cast<void *>(&copiedResult)));
60}
61
63/**
64 * \brief Creates a new RResultPtr with a cloned Snapshot action.
65 *
66 * \param inptr The pointer.
67 * \param outputFileName A new name for the output file of the cloned action.
68 * \return A new pointer with a cloned action.
69 *
70 * This overload is needed since cloning a Snapshot node usually also involves
71 * changing the name of the output file, otherwise the cloned Snapshot would
72 * overwrite the same file.
73 */
75} // namespace RDF
76} // namespace Internal
77
78namespace Detail {
79namespace RDF {
81// Fwd decl for RResultPtr
82template <typename T>
83RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df,
84 std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr);
85
86// Fwd decl for GetMergeableValue
87template <typename T>
88class RMergeableValue;
89
90template <typename T>
91std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr);
92} // namespace RDF
93} // namespace Detail
94namespace RDF {
97namespace TTraits = ROOT::TypeTraits;
98
99/// Smart pointer for the return type of actions.
100/**
101\class ROOT::RDF::RResultPtr
102\ingroup dataframe
103\brief A wrapper around the result of RDataFrame actions able to trigger calculations lazily.
104\tparam T Type of the action result
105
106A wrapper around a shared_ptr which allows to access the result of RDataFrame actions.
107The underlying object can be accessed by dereferencing the RResultPtr:
108~~~{.cpp}
109ROOT::RDF::RResultPtr<TH1D> histo = rdf.Histo1D(...);
110histo->Draw(); // Starts running the event loop
111~~~
112Upon invocation of the arrow operator or dereferencing (`operator*`), the
113loop on the events and calculations of all scheduled actions are executed
114if needed.
115It is possible to iterate on the result proxy if the proxied object is a collection.
116~~~{.cpp}
117for (auto& myItem : myResultProxy) { ... };
118~~~
119If iteration is not supported by the type of the proxied object, a compilation error is thrown.
120
121When shared ownership to the result is desired, a copy of the underlying shared_ptr can be obtained:
122~~~{.cpp}
123std::shared_ptr<TH1D> ProduceResult(const char *columnname) {
124 auto ht = rdf.Histo1D(*h, columname);
125 return ht.GetSharedPtr();
126}
127~~~
128Note that this will run the event loop. If this is not desired, the RResultPtr can be copied.
129*/
130template <typename T>
132 // friend declarations
133 template <typename T1>
134 friend class RResultPtr;
135
136 template <typename T1>
138 std::shared_ptr<RDFInternal::RActionBase>);
139
140 template <typename T1>
142
143 template <class T1, class T2>
144 friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
145 template <class T1, class T2>
146 friend bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
147 template <class T1>
148 friend bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
149 template <class T1>
150 friend bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
151 template <class T1>
152 friend bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
153 template <class T1>
154 friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
155 friend std::unique_ptr<RDFDetail::RMergeableValue<T>> RDFDetail::GetMergeableValue<T>(RResultPtr<T> &rptr);
156
158
159 friend class RResultHandle;
160
161 friend RResultPtr<T> ROOT::Internal::RDF::CloneResultAndAction<T>(const RResultPtr<T> &inptr);
164 const std::string &outputFileName);
165 /// \cond HIDDEN_SYMBOLS
167 struct RIterationHelper {
168 using Iterator_t = void;
169 void GetBegin(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask begin for this class."); }
170 void GetEnd(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask end for this class."); }
171 };
172
173 template <typename V>
174 struct RIterationHelper<V, true> {
175 using Iterator_t = decltype(std::begin(std::declval<V>()));
176 static Iterator_t GetBegin(const V &v) { return std::begin(v); };
177 static Iterator_t GetEnd(const V &v) { return std::end(v); };
178 };
179 /// \endcond
180
181 /// Non-owning pointer to the RLoopManager at the root of this computation graph.
182 /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr.
184 std::shared_ptr<T> fObjPtr; ///< Shared pointer encapsulating the wrapped result
185 /// Owning pointer to the action that will produce this result.
186 /// Ownership is shared with other copies of this ResultPtr.
187 std::shared_ptr<RDFInternal::RActionBase> fActionPtr;
188
189 /// Triggers the event loop in the RLoopManager
190 void TriggerRun();
191
193 {
194 if (fObjPtr == nullptr)
195 throw std::runtime_error("Trying to access the contents of a null RResultPtr.");
196 }
197
199 std::shared_ptr<RDFInternal::RActionBase> actionPtr)
201 {
202 }
203
204public:
205 using Value_t = T; ///< Convenience alias to simplify access to proxied type
206 static constexpr ULong64_t kOnce = 0ull; ///< Convenience definition to express a callback must be executed once
207
208 RResultPtr() = default;
209 RResultPtr(const RResultPtr &) = default;
210 RResultPtr(RResultPtr &&) = default;
211 RResultPtr &operator=(const RResultPtr &) = default;
213 explicit operator bool() const { return bool(fObjPtr); }
214
215 /// Convert a RResultPtr<T2> to a RResultPtr<T>.
216 ///
217 /// Useful e.g. to store a number of RResultPtr<TH1D> and RResultPtr<TH2D> in a std::vector<RResultPtr<TH1>>.
218 /// The requirements on T2 and T are the same as for conversion between std::shared_ptr<T2> and std::shared_ptr<T>.
219 template <typename T2,
220 std::enable_if_t<std::is_constructible<std::shared_ptr<T>, std::shared_ptr<T2>>::value, int> = 0>
224
225 /// Produce the encapsulated result, and return a shared pointer to it.
226 /// If RDataFrame hasn't produced the result yet, triggers the event loop and execution
227 /// of all actions booked in the associated RLoopManager.
228 /// \note To share a "lazy" handle to the result without running the event loop, copy the RResultPtr.
229 std::shared_ptr<T> GetSharedPtr()
230 {
231 if (fActionPtr != nullptr && !fActionPtr->HasRun())
232 TriggerRun();
233 return fObjPtr;
234 }
235
236 /// Get a const reference to the encapsulated object.
237 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
238 const T &GetValue()
239 {
240 ThrowIfNull();
241 return *GetSharedPtr();
242 }
243
244 /// Get the pointer to the encapsulated object.
245 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
246 /// \note Ownership is not transferred to the caller.
247 T *GetPtr() { return GetSharedPtr().get(); }
248
249 /// Get a reference to the encapsulated object.
250 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
252 {
253 ThrowIfNull();
254 return *GetSharedPtr();
255 }
256
257 /// Get a pointer to the encapsulated object.
258 /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
259 /// \note Ownership is not transferred to the caller.
261 {
262 ThrowIfNull();
263 return GetSharedPtr().get();
264 }
265
266 /// Return an iterator to the beginning of the contained object if this makes
267 /// sense, throw a compilation error otherwise
269 {
270 ThrowIfNull();
271 if (!fActionPtr->HasRun())
272 TriggerRun();
274 }
275
276 /// Return an iterator to the end of the contained object if this makes
277 /// sense, throw a compilation error otherwise
279 {
280 ThrowIfNull();
281 if (!fActionPtr->HasRun())
282 TriggerRun();
284 }
285
286 // clang-format off
287 /// Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
288 ///
289 /// \param[in] everyNEvents Frequency at which the callback will be called, as a number of events processed
290 /// \param[in] callback a callable with signature `void(Value_t&)` where Value_t is the type of the value contained in this RResultPtr
291 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
292 ///
293 /// The callback must be a callable (lambda, function, functor class...) that takes a reference to the result type as
294 /// argument and returns nothing. RDataFrame will invoke registered callbacks passing partial action results as
295 /// arguments to them (e.g. a histogram filled with a part of the selected events, a counter incremented only up to a
296 /// certain point, a mean over a subset of the events and so forth).
297 ///
298 /// Callbacks can be used e.g. to inspect partial results of the analysis while the event loop is running. For
299 /// example one can draw an up-to-date version of a result histogram every 100 entries like this:
300 /// \code{.cpp}
301 /// auto h = tdf.Histo1D("x");
302 /// TCanvas c("c","x hist");
303 /// h.OnPartialResult(100, [&c](TH1D &h_) { c.cd(); h_.Draw(); c.Update(); });
304 /// h->Draw(); // event loop runs here, this `Draw` is executed after the event loop is finished
305 /// \endcode
306 ///
307 /// A value of 0 for everyNEvents indicates the callback must be executed only once, before running the event loop.
308 /// A conveniece definition `kOnce` is provided to make this fact more expressive in user code (see snippet below).
309 /// Multiple callbacks can be registered with the same RResultPtr (i.e. results of RDataFrame actions) and will
310 /// be executed sequentially. Callbacks are executed in the order they were registered.
311 /// The type of the value contained in a RResultPtr is also available as RResultPtr<T>::Value_t, e.g.
312 /// \code{.cpp}
313 /// auto h = tdf.Histo1D("x");
314 /// // h.kOnce is 0
315 /// // decltype(h)::Value_t is TH1D
316 /// \endcode
317 ///
318 /// When implicit multi-threading is enabled, the callback:
319 /// - will never be executed by multiple threads concurrently: it needs not be thread-safe. For example the snippet
320 /// above that draws the partial histogram on a canvas works seamlessly in multi-thread event loops.
321 /// - will be executed by the first worker that arrives at the callback, and then only be executed when the same result
322 /// is updated. For example, if dataframe uses N internal copies of a result, it will always be the `i`th < N object
323 /// that is passed into the callback.
324 /// - will always be executed "everyNEvents": the partial result passed into the callback will have accumulated N more
325 /// events, irrespective of the progress that other worker threads make.
326 /// - might be executed by a different worker thread at different times: the value of `std::this_thread::get_id()`
327 /// might change between calls.
328 ///
329 /// To register a callback that is called by _each_ worker thread (concurrently) every N events one can use
330 /// OnPartialResultSlot().
331 // clang-format on
332 RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback)
333 {
334 ThrowIfNull();
335 auto actionPtr = fActionPtr;
336 constexpr auto kUninit = std::numeric_limits<unsigned int>::max();
337 auto activeSlot = std::make_shared<std::atomic_uint>(kUninit);
338 auto c = [=](unsigned int slot) {
339 if (activeSlot->load() == kUninit) {
340 // Try to grab the right to run the callback for our slot:
341 unsigned int expected = kUninit;
342 activeSlot->compare_exchange_strong(expected, slot);
343 }
344 if (activeSlot->load() != slot)
345 return;
346
347 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
348 callback(*partialResult);
349 };
351 return *this;
352 }
353
354 // clang-format off
355 /// Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's partial result.
356 ///
357 /// \param[in] everyNEvents Frequency at which the callback will be called by each thread, as a number of events processed
358 /// \param[in] callback A callable with signature `void(unsigned int, Value_t&)` where Value_t is the type of the value contained in this RResultPtr
359 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
360 ///
361 /// See `OnPartialResult` for a generic explanation of the callback mechanism.
362 /// Compared to `OnPartialResult`, this method has two major differences:
363 /// - all worker threads invoke the callback once every specified number of events. The event count is per-thread,
364 /// and callback invocation might happen concurrently (i.e. the callback must be thread-safe)
365 /// - the callable must take an extra `unsigned int` parameter corresponding to a multi-thread "processing slot":
366 /// this is a "helper value" to simplify writing thread-safe callbacks: different worker threads might invoke the
367 /// callback concurrently but always with different `slot` numbers.
368 /// - a value of 0 for everyNEvents indicates the callback must be executed once _per slot_.
369 ///
370 /// For example, the following snippet prints out a thread-safe progress bar of the events processed by RDataFrame
371 /// \code
372 /// auto c = tdf.Count(); // any action would do, but `Count` is the most lightweight
373 /// std::string progress;
374 /// std::mutex bar_mutex;
375 /// c.OnPartialResultSlot(nEvents / 100, [&progress, &bar_mutex](unsigned int, ULong64_t &) {
376 /// std::lock_guard<std::mutex> lg(bar_mutex);
377 /// progress.push_back('#');
378 /// std::cout << "\r[" << std::left << std::setw(100) << progress << ']' << std::flush;
379 /// });
380 /// std::cout << "Analysis running..." << std::endl;
381 /// *c; // trigger the event loop by accessing an action's result
382 /// std::cout << "\nDone!" << std::endl;
383 /// \endcode
384 // clang-format on
385 RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback)
386 {
387 ThrowIfNull();
388 auto actionPtr = fActionPtr;
389 auto c = [actionPtr, callback](unsigned int slot) {
390 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
391 callback(slot, *partialResult);
392 };
394 return *this;
395 }
396
397 // clang-format off
398 /// Check whether the result has already been computed
399 ///
400 /// ~~~{.cpp}
401 /// auto res = df.Count();
402 /// res.IsReady(); // false, access will trigger event loop
403 /// std::cout << *res << std::endl; // triggers event loop
404 /// res.IsReady(); // true
405 /// ~~~
406 // clang-format on
407 bool IsReady() const
408 {
409 if (fActionPtr == nullptr)
410 return false;
411 return fActionPtr->HasRun();
412 }
413};
414
415template <typename T>
417{
418 fLoopManager->Run();
419}
420
421template <class T1, class T2>
423{
424 return lhs.fObjPtr == rhs.fObjPtr;
425}
426
427template <class T1, class T2>
429{
430 return lhs.fObjPtr != rhs.fObjPtr;
431}
432
433template <class T1>
434bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
435{
436 return lhs.fObjPtr == rhs;
437}
438
439template <class T1>
440bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
441{
442 return lhs == rhs.fObjPtr;
443}
444
445template <class T1>
446bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
447{
448 return lhs.fObjPtr != rhs;
449}
450
451template <class T1>
452bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
453{
454 return lhs != rhs.fObjPtr;
455}
456
457} // namespace RDF
458
459namespace Detail {
460namespace RDF {
461/// Create a RResultPtr and set its pointer to the corresponding RAction
462/// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr.
463template <typename T>
465MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr)
466{
467 return RResultPtr<T>(r, &lm, std::move(actionPtr));
468}
469
470////////////////////////////////////////////////////////////////////////////////
471/// \brief Retrieve a mergeable value from an RDataFrame action.
472/// \param[in] rptr lvalue reference of an RResultPtr object.
473/// \returns An RMergeableValue holding the result of the action, wrapped in an
474/// `std::unique_ptr`.
475///
476/// This function triggers the execution of the RDataFrame computation graph.
477/// Then retrieves an RMergeableValue object created with the result wrapped by
478/// the RResultPtr argument. The user obtains ownership of the mergeable, which
479/// in turn holds a copy of the result of the action. The RResultPtr is not
480/// destroyed in the process and will still retain (shared) ownership of the
481/// original result.
482///
483/// Example usage:
484/// ~~~{.cpp}
485/// using namespace ROOT::Detail::RDF;
486/// ROOT::RDataFrame d("myTree", "file_*.root");
487/// auto h = d.Histo1D("Branch_A");
488/// auto mergeablehisto = GetMergeableValue(h);
489/// ~~~
490template <typename T>
491std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr)
492{
493 rptr.ThrowIfNull();
494 if (!rptr.fActionPtr->HasRun())
495 rptr.TriggerRun(); // Prevents from using `const` specifier in parameter
496 return std::unique_ptr<RMergeableValue<T>>{
497 static_cast<RMergeableValue<T> *>(rptr.fActionPtr->GetMergeableValue().release())};
498}
499} // namespace RDF
500} // namespace Detail
501} // namespace ROOT
502
503#endif // ROOT_TRESULTPROXY
#define c(i)
Definition RSha256.hxx:101
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Bool_t operator!=(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:104
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The head node of a RDF computation graph.
void Run(bool jit=true)
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
Helper class that provides the operation graph nodes.
A type-erased version of RResultPtr and RResultMap.
Smart pointer for the return type of actions.
bool IsReady() const
Check whether the result has already been computed.
void TriggerRun()
Triggers the event loop in the RLoopManager.
RResultPtr(RResultPtr &&)=default
T * GetPtr()
Get the pointer to the encapsulated object.
std::shared_ptr< T > fObjPtr
Shared pointer encapsulating the wrapped result.
RResultPtr< T > & OnPartialResult(ULong64_t everyNEvents, std::function< void(T &)> callback)
Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
RDFDetail::RLoopManager * fLoopManager
Non-owning pointer to the RLoopManager at the root of this computation graph.
friend bool operator!=(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
RIterationHelper< T >::Iterator_t begin()
Return an iterator to the beginning of the contained object if this makes sense, throw a compilation ...
T Value_t
Convenience alias to simplify access to proxied type.
const T & GetValue()
Get a const reference to the encapsulated object.
std::shared_ptr< T > GetSharedPtr()
Produce the encapsulated result, and return a shared pointer to it.
RResultPtr(const RResultPtr &)=default
T & operator*()
Get a reference to the encapsulated object.
RResultPtr & operator=(const RResultPtr &)=default
RResultPtr< T > & OnPartialResultSlot(ULong64_t everyNEvents, std::function< void(unsigned int, T &)> callback)
Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's ...
RResultPtr & operator=(RResultPtr &&)=default
friend bool operator==(const RResultPtr< T1 > &lhs, const RResultPtr< T2 > &rhs)
RResultPtr(const RResultPtr< T2 > &r)
Convert a RResultPtr<T2> to a RResultPtr<T>.
std::shared_ptr< RDFInternal::RActionBase > fActionPtr
Owning pointer to the action that will produce this result.
T * operator->()
Get a pointer to the encapsulated object.
static constexpr ULong64_t kOnce
Convenience definition to express a callback must be executed once.
RResultPtr(std::shared_ptr< T > objPtr, RDFDetail::RLoopManager *lm, std::shared_ptr< RDFInternal::RActionBase > actionPtr)
RIterationHelper< T >::Iterator_t end()
Return an iterator to the end of the contained object if this makes sense, throw a compilation error ...
#define T2
Definition md5.inl:147
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
ROOT::RDF::Experimental::RResultMap< T > CloneResultAndAction(const ROOT::RDF::Experimental::RResultMap< T > &inmap)
Clones an RResultMap and its corresponding RVariedAction.
bool operator==(const RConcurrentHashColl::HashValue &lhs, const RConcurrentHashColl::HashValue &rhs)
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...