Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooAbsDataHelper.h
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * Authors: *
5 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
6 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
7 * *
8 * Copyright (c) 2000-2021, Regents of the University of California *
9 * and Stanford University. All rights reserved. *
10 * *
11 * Redistribution and use in source and binary forms, *
12 * with or without modification, are permitted according to the terms *
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
14 *****************************************************************************/
15/// Create RooDataSet/RooDataHist from RDataFrame.
16/// \date Mar 2021
17/// \author Stephan Hageboeck (CERN)
18#ifndef ROOABSDATAHELPER
19#define ROOABSDATAHELPER
20
21#include <RooRealVar.h>
22#include <RooArgSet.h>
23#include <RooDataSet.h>
24#include <RooDataHist.h>
25#include <RooMsgService.h>
26
27#include <ROOT/RDataFrame.hxx>
29#include <TROOT.h>
30
31#include <vector>
32#include <mutex>
33#include <memory>
34#include <cstddef>
35#include <string>
36#include <stdexcept>
37
38class TTreeReader;
39
40/// This is a helper for an RDataFrame action, which fills RooFit data classes.
41///
42/// \tparam DataSet_t Either RooDataSet or RooDataHist.
43///
44/// To construct RooDataSet / RooDataHist within RDataFrame
45/// - Construct one of the two action helpers RooDataSetHelper or RooDataHistHelper. Pass constructor arguments
46/// to RooAbsDataHelper::RooAbsDataHelper() as for the original classes.
47/// The arguments are forwarded to the actual data classes without any changes.
48/// - Book the helper as an RDataFrame action. Here, the RDataFrame column types have to be passed as template parameters.
49/// - Pass the column names to the Book action. These are matched by position to the variables of the dataset.
50///
51/// All arguments passed to are forwarded to RooDataSet::RooDataSet() / RooDataHist::RooDataHist().
52///
53/// #### Usage example:
54/// ```
55/// RooRealVar x("x", "x", -5., 5.);
56/// RooRealVar y("y", "y", -50., 50.);
57/// auto myDataSet = rdataframe.Book<double, double>(
58/// RooDataSetHelper{"dataset", // Name (directly forwarded to RooDataSet::RooDataSet())
59/// "Title of dataset", // Title ( ~ " ~ )
60/// RooArgSet(x, y) }, // Variables to create in dataset
61/// {"x", "y"} // Column names from RDataFrame
62/// );
63///
64/// ```
65/// \warning Variables in the dataset and columns in RDataFrame are **matched by position, not by name**.
66/// This enables the easy exchanging of columns that should be filled into the dataset.
67template<class DataSet_t>
68class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImpl<RooAbsDataHelper<DataSet_t>> {
69public:
70 using Result_t = DataSet_t;
71
72private:
73 std::shared_ptr<DataSet_t> _dataset;
74 std::mutex _mutex_dataset;
75 std::size_t _numInvalid = 0;
76
77 std::vector<std::vector<double>> _events; // One vector of values per data-processing slot
78 const std::size_t _eventSize; // Number of variables in dataset
79
80public:
81
82 /// Construct a helper to create RooDataSet/RooDataHist.
83 /// \tparam Args_t Parameter pack of arguments.
84 /// \param args Constructor arguments for RooDataSet::RooDataSet() or RooDataHist::RooDataHist().
85 /// All arguments will be forwarded as they are.
86 template<typename... Args_t>
87 RooAbsDataHelper(Args_t&&... args) :
88 _dataset{ new DataSet_t(std::forward<Args_t>(args)...) },
89 _eventSize{ _dataset->get()->size() }
90 {
91 const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetThreadPoolSize() : 1;
92 _events.resize(nSlots);
93 }
94
95
96 /// Move constructor. It transfers ownership of the internal RooAbsData object.
98 _dataset{ std::move(other._dataset) },
100 _events{ std::move(other._events) },
101 _eventSize{ other._eventSize }
102 {
103
104 }
105
106 /// Copy is discouraged.
107 /// Use `rdataframe.Book<...>(std::move(absDataHelper), ...)` instead.
109 /// Return internal dataset/hist.
110 std::shared_ptr<DataSet_t> GetResultPtr() const { return _dataset; }
111 /// RDataFrame interface method. Nothing has to be initialised.
112 void Initialize() {}
113 /// RDataFrame interface method. No tasks.
114 void InitTask(TTreeReader *, unsigned int) {}
115 /// RDataFrame interface method.
116 std::string GetActionName() { return "RooDataSetHelper"; }
117
118 /// Method that RDataFrame calls to pass a new event.
119 ///
120 /// \param slot When IMT is used, this is a number in the range [0, nSlots) to fill lock free.
121 /// \param values x, y, z, ... coordinates of the event.
122 template <typename... ColumnTypes>
123 void Exec(unsigned int slot, ColumnTypes... values)
124 {
125 if (sizeof...(values) != _eventSize) {
126 throw std::invalid_argument(std::string("RooDataSet can hold ")
127 + std::to_string(_eventSize)
128 + " variables per event, but RDataFrame passed "
129 + std::to_string(sizeof...(values))
130 + " columns.");
131 }
132
133 auto& vector = _events[slot];
134 for (auto&& val : {values...}) {
135 vector.push_back(val);
136 }
137
138 if (vector.size() > 1024 && _mutex_dataset.try_lock()) {
139 const std::lock_guard<std::mutex> guard(_mutex_dataset, std::adopt_lock_t());
140 FillDataSet(vector, _eventSize);
141 vector.clear();
142 }
143 }
144
145 /// Empty all buffers into the dataset/hist to finish processing.
146 void Finalize() {
147 for (auto& vector : _events) {
148 FillDataSet(vector, _eventSize);
149 vector.clear();
150 }
151
152 if (_numInvalid>0) {
153 const auto prefix = std::string(_dataset->ClassName()) + "Helper::Finalize(" + _dataset->GetName() + ") ";
154 oocoutW(static_cast<TObject*>(nullptr), DataHandling) << prefix << "Ignored " << _numInvalid << " out-of-range events\n";
155 }
156 }
157
158
159private:
160 /// Append all `events` to the internal RooDataSet or increment the bins of a RooDataHist at the given locations.
161 ///
162 /// \param events Events to fill into `data`. The layout is assumed to be `(x, y, z, ...) (x, y, z, ...), (...)`.
163 /// \note The order of the variables inside `events` must be consistent with the order given in the constructor.
164 /// No matching by name is performed.
165 /// \param eventSize Size of a single event.
166 void FillDataSet(const std::vector<double>& events, unsigned int eventSize) {
167 if (events.size() == 0)
168 return;
169
170 const RooArgSet& argSet = *_dataset->get();
171
172 for (std::size_t i = 0; i < events.size(); i += eventSize) {
173
174 // Creating a RooDataSet from an RDataFrame should be consistent with the
175 // creation from a TTree. The construction from a TTree discards entries
176 // outside the variable definition range, so we have to do that too (see
177 // also RooTreeDataStore::loadValues).
178
179 bool allOK = true;
180 for (std::size_t j=0; j < eventSize; ++j) {
181 auto * destArg = static_cast<RooAbsRealLValue*>(argSet[j]);
182 double sourceVal = events[i+j];
183
184 if (!destArg->inRange(sourceVal, nullptr)) {
185 _numInvalid++ ;
186 allOK = false;
187 const auto prefix = std::string(_dataset->ClassName()) + "Helper::FillDataSet(" + _dataset->GetName() + ") ";
188 if (_numInvalid < 5) {
189 // Unlike in the TreeVectorStore case, we don't log the event
190 // number here because we don't know it anyway, because of
191 // RDataFrame slots and multithreading.
192 oocoutI(static_cast<TObject*>(nullptr), DataHandling) << prefix << "Skipping event because " << destArg->GetName()
193 << " cannot accommodate the value " << sourceVal << "\n";
194 } else if (_numInvalid == 5) {
195 oocoutI(static_cast<TObject*>(nullptr), DataHandling) << prefix << "Skipping ...\n";
196 }
197 break ;
198 }
199 destArg->setVal(sourceVal);
200 }
201 if(allOK) {
202 _dataset->add(argSet);
203 }
204 }
205 }
206};
207
208/// Helper for creating a RooDataSet inside RDataFrame. \see RooAbsDataHelper
210/// Helper for creating a RooDataHist inside RDataFrame. \see RooAbsDataHelper
212
213#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define oocoutW(o, a)
#define oocoutI(o, a)
Storage_t const & get() const
Const access to the underlying stl container.
This is a helper for an RDataFrame action, which fills RooFit data classes.
void Exec(unsigned int slot, ColumnTypes... values)
Method that RDataFrame calls to pass a new event.
const std::size_t _eventSize
std::size_t _numInvalid
void Initialize()
RDataFrame interface method. Nothing has to be initialised.
std::mutex _mutex_dataset
std::shared_ptr< DataSet_t > GetResultPtr() const
Return internal dataset/hist.
std::string GetActionName()
RDataFrame interface method.
std::vector< std::vector< double > > _events
RooAbsDataHelper(Args_t &&... args)
Construct a helper to create RooDataSet/RooDataHist.
void Finalize()
Empty all buffers into the dataset/hist to finish processing.
RooAbsDataHelper(const RooAbsDataHelper &)=delete
Copy is discouraged.
void FillDataSet(const std::vector< double > &events, unsigned int eventSize)
Append all events to the internal RooDataSet or increment the bins of a RooDataHist at the given loca...
void InitTask(TTreeReader *, unsigned int)
RDataFrame interface method. No tasks.
std::shared_ptr< DataSet_t > _dataset
RooAbsDataHelper(RooAbsDataHelper &&other)
Move constructor. It transfers ownership of the internal RooAbsData object.
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:35
Mother of all ROOT objects.
Definition TObject.h:41
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:44
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
Definition TROOT.cxx:565