Logo ROOT   6.18/05
Reference Guide
RLazyDSImpl.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLAZYDSIMPL
12#define ROOT_RLAZYDSIMPL
13
15#include "ROOT/RMakeUnique.hxx"
16#include "ROOT/RDataSource.hxx"
17#include "ROOT/RResultPtr.hxx"
18#include "ROOT/TSeq.hxx"
19
20#include <algorithm>
21#include <map>
22#include <tuple>
23#include <string>
24#include <typeinfo>
25#include <vector>
26
27namespace ROOT {
28
29namespace RDF {
30////////////////////////////////////////////////////////////////////////////////////////////////
31/// \brief A RDataSource implementation which is built on top of result proxies
32///
33/// This component allows to create a data source on a set of columns coming from
34/// one or multiple data frames. The processing of the parent data frames starts
35/// only when the event loop is triggered in the data frame initialised with a
36/// RLazyDS.
37///
38/// The implementation takes care of matching compile time information with runtime
39/// information, e.g. expanding in a smart way the template parameters packs.
40template <typename... ColumnTypes>
41class RLazyDS final : public ROOT::RDF::RDataSource {
42 using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
43
44 std::tuple<RResultPtr<std::vector<ColumnTypes>>...> fColumns;
45 const std::vector<std::string> fColNames;
46 const std::map<std::string, std::string> fColTypesMap;
47 // The role of the fPointerHoldersModels is to be initialised with the pack
48 // of arguments in the constrcutor signature at construction time
49 // Once the number of slots is known, the fPointerHolders are initialised
50 // according to the models.
52 std::vector<PointerHolderPtrs_t> fPointerHolders;
53 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
54 unsigned int fNSlots{0};
55
56 Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
57 {
58 auto colNameStr = std::string(colName);
59 // This could be optimised and done statically
60 const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
61 auto it = fColTypesMap.find(colNameStr);
62 if (fColTypesMap.end() == it) {
63 std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
64 throw std::runtime_error(err);
65 }
66
67 const auto colIdName = it->second;
68 if (colIdName != idName) {
69 std::string err = "Column " + colNameStr + " has type " + colIdName +
70 " while the id specified is associated to type " + idName;
71 throw std::runtime_error(err);
72 }
73
74 const auto colBegin = fColNames.begin();
75 const auto colEnd = fColNames.end();
76 const auto namesIt = std::find(colBegin, colEnd, colName);
77 const auto index = std::distance(colBegin, namesIt);
78
79 Record_t ret(fNSlots);
80 for (auto slot : ROOT::TSeqU(fNSlots)) {
81 ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
82 }
83 return ret;
84 }
85
86 size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
87 template <std::size_t... S>
88 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
89 {
90 std::initializer_list<int> expander{
91 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
92 (void)expander; // avoid unused variable warnings
93 }
94
95 template <std::size_t... S>
96 void ColLenghtChecker(std::index_sequence<S...>)
97 {
98 if (sizeof...(S) < 2)
99 return;
100
101 const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
102 const auto expectedLen = colLengths[0];
103 std::string err;
104 for (auto i : TSeqI(1, colLengths.size())) {
105 if (expectedLen != colLengths[i]) {
106 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
107 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
108 std::to_string(colLengths[i]);
109 }
110 }
111 if (!err.empty()) {
112 throw std::runtime_error(err);
113 }
114 }
115
116protected:
117 std::string AsString() { return "lazy data source"; };
118
119public:
120 RLazyDS(std::pair<std::string, RResultPtr<std::vector<ColumnTypes>>>... colsNameVals)
121 : fColumns(std::tuple<RResultPtr<std::vector<ColumnTypes>>...>(colsNameVals.second...)),
122 fColNames({colsNameVals.first...}),
123 fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
125 {
126 }
127
129 {
130 for (auto &&ptrHolderv : fPointerHolders) {
131 for (auto &&ptrHolder : ptrHolderv) {
132 delete ptrHolder;
133 }
134 }
135 }
136
137 const std::vector<std::string> &GetColumnNames() const { return fColNames; }
138
139 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
140 {
141 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
142 return entryRanges;
143 }
144
145 std::string GetTypeName(std::string_view colName) const
146 {
147 const auto key = std::string(colName);
148 return fColTypesMap.at(key);
149 }
150
151 bool HasColumn(std::string_view colName) const
152 {
153 const auto key = std::string(colName);
154 const auto endIt = fColTypesMap.end();
155 return endIt != fColTypesMap.find(key);
156 }
157
158 bool SetEntry(unsigned int slot, ULong64_t entry)
159 {
160 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
161 return true;
162 }
163
164 void SetNSlots(unsigned int nSlots)
165 {
166 fNSlots = nSlots;
167 const auto nCols = fColNames.size();
168 fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
169 auto colIndex = 0U;
170 for (auto &&ptrHolderv : fPointerHolders) {
171 for (auto slot : ROOT::TSeqI(fNSlots)) {
172 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
173 ptrHolderv.emplace_back(ptrHolder);
174 (void)slot;
175 }
176 colIndex++;
177 }
178 for (auto &&ptrHolder : fPointerHoldersModels)
179 delete ptrHolder;
180 }
181
183 {
184 ColLenghtChecker(std::index_sequence_for<ColumnTypes...>());
185 const auto nEntries = GetEntriesNumber();
186 const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
187 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
188 fEntryRanges.resize(fNSlots);
189 auto init = 0ULL;
190 auto end = 0ULL;
191 for (auto &&range : fEntryRanges) {
192 end = init + nEntriesInRange;
193 if (0 != reminder) { // Distribute the reminder among the first chunks
194 reminder--;
195 end += 1;
196 }
197 range.first = init;
198 range.second = end;
199 init = end;
200 }
201 }
202
203 std::string GetLabel() { return "LazyDS"; }
204};
205
206} // ns RDF
207
208} // ns ROOT
209
210#endif
static Int_t init()
unsigned long long ULong64_t
Definition: RtypesCore.h:70
typedef void((*Func_t)())
Class to wrap a pointer and delete the memory associated to it correctly.
Definition: RDataSource.hxx:58
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
const std::vector< std::string > fColNames
Definition: RLazyDSImpl.hxx:45
std::vector< PointerHolderPtrs_t > fPointerHolders
Definition: RLazyDSImpl.hxx:52
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
std::vector< ROOT::Internal::TDS::TPointerHolder * > PointerHolderPtrs_t
Definition: RLazyDSImpl.hxx:42
std::string GetLabel()
Return a string representation of the datasource type.
unsigned int fNSlots
Definition: RLazyDSImpl.hxx:54
void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence< S... >)
Definition: RLazyDSImpl.hxx:88
const PointerHolderPtrs_t fPointerHoldersModels
Definition: RLazyDSImpl.hxx:51
const std::map< std::string, std::string > fColTypesMap
Definition: RLazyDSImpl.hxx:46
Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
type-erased vector of pointers to pointers to column values - one per slot
Definition: RLazyDSImpl.hxx:56
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
void Initialise()
Convenience method called before starting an event-loop.
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
std::string AsString()
size_t GetEntriesNumber()
Definition: RLazyDSImpl.hxx:86
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
RLazyDS(std::pair< std::string, RResultPtr< std::vector< ColumnTypes > > >... colsNameVals)
std::tuple< RResultPtr< std::vector< ColumnTypes > >... > fColumns
Definition: RLazyDSImpl.hxx:44
void ColLenghtChecker(std::index_sequence< S... >)
Definition: RLazyDSImpl.hxx:96
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RLazyDSImpl.hxx:53
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:72
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
basic_string_view< char > string_view
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:83
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
TSeq< int > TSeqI
Definition: TSeq.hxx:194
RooArgSet S(const RooAbsArg &v1)
static constexpr double second