Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RLazyDSImpl.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLAZYDSIMPL
12#define ROOT_RLAZYDSIMPL
13
14#include "ROOT/RDataSource.hxx"
15#include "ROOT/RResultPtr.hxx"
16#include "ROOT/TSeq.hxx"
17
18#include <algorithm>
19#include <map>
20#include <memory>
21#include <tuple>
22#include <string>
23#include <typeinfo>
24#include <utility> // std::index_sequence
25#include <vector>
26
27namespace ROOT::Internal::RDF {
35} // namespace ROOT::Internal::RDF
36
37namespace ROOT {
38
39namespace RDF {
40////////////////////////////////////////////////////////////////////////////////////////////////
41/// \brief A RDataSource implementation which is built on top of result proxies
42///
43/// This component allows to create a data source on a set of columns coming from
44/// one or multiple data frames. The processing of the parent data frames starts
45/// only when the event loop is triggered in the data frame initialized with a
46/// RLazyDS.
47///
48/// The implementation takes care of matching compile time information with runtime
49/// information, e.g. expanding in a smart way the template parameters packs.
50template <typename... ColumnTypes>
52 using PointerHolderPtrs_t = std::vector<ROOT::Internal::RDF::TPointerHolder *>;
53
54 std::tuple<RResultPtr<std::vector<ColumnTypes>>...> fColumns;
55 const std::vector<std::string> fColNames;
56 const std::map<std::string, std::string> fColTypesMap;
57 // The role of the fPointerHoldersModels is to be initialized with the pack
58 // of arguments in the constrcutor signature at construction time
59 // Once the number of slots is known, the fPointerHolders are initialized
60 // according to the models.
62 std::vector<PointerHolderPtrs_t> fPointerHolders;
63 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
64
65 Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) final
66 {
67 // We use the new API
68 return {};
69 }
70
71 size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
72 template <std::size_t... S>
73 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
74 {
75 std::initializer_list<int> expander{
76 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
77 (void)expander; // avoid unused variable warnings
78 }
79
80 template <std::size_t... S>
81 void ColLengthChecker(std::index_sequence<S...>)
82 {
83 if (sizeof...(S) < 2)
84 return;
85
86 const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
87 const auto expectedLen = colLengths[0];
88 std::string err;
89 for (auto i : TSeqI(1, colLengths.size())) {
90 if (expectedLen != colLengths[i]) {
91 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
92 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
93 std::to_string(colLengths[i]);
94 }
95 }
96 if (!err.empty()) {
97 throw std::runtime_error(err);
98 }
99 }
100
101protected:
102 std::string AsString() final { return "lazy data source"; };
103
104public:
105 RLazyDS(std::pair<std::string, RResultPtr<std::vector<ColumnTypes>>>... colsNameVals)
106 : fColumns(std::tuple<RResultPtr<std::vector<ColumnTypes>>...>(colsNameVals.second...)),
107 fColNames({colsNameVals.first...}),
108 fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
109 fPointerHoldersModels({new ROOT::Internal::RDF::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...})
110 {
111 }
112
113 // Rule of five
114 RLazyDS(const RLazyDS &) = delete;
115 RLazyDS &operator=(const RLazyDS &) = delete;
116 RLazyDS(RLazyDS &&) = delete;
117 RLazyDS &operator=(RLazyDS &&) = delete;
119 {
120 for (auto &&ptrHolderv : fPointerHolders) {
121 for (auto &&ptrHolder : ptrHolderv) {
122 delete ptrHolder;
123 }
124 }
125 }
126
127 const std::vector<std::string> &GetColumnNames() const final { return fColNames; }
128
129 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final
130 {
131 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
132 return entryRanges;
133 }
134
135 std::string GetTypeName(std::string_view colName) const final
136 {
137 const auto key = std::string(colName);
138 return fColTypesMap.at(key);
139 }
140
141 bool HasColumn(std::string_view colName) const final
142 {
143 const auto key = std::string(colName);
144 const auto endIt = fColTypesMap.end();
145 return endIt != fColTypesMap.find(key);
146 }
147
148 bool SetEntry(unsigned int slot, ULong64_t entry) final
149 {
150 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
151 return true;
152 }
153
154 void SetNSlots(unsigned int nSlots) final
155 {
156 fNSlots = nSlots;
157 const auto nCols = fColNames.size();
158 fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
159 auto colIndex = 0U;
160 for (auto &&ptrHolderv : fPointerHolders) {
161 for (auto slot : ROOT::TSeqI(fNSlots)) {
162 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
163 ptrHolderv.emplace_back(ptrHolder);
164 (void)slot;
165 }
166 colIndex++;
167 }
168 for (auto &&ptrHolder : fPointerHoldersModels)
169 delete ptrHolder;
170 }
171
173 {
174 ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
175 const auto nEntries = GetEntriesNumber();
176 const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
177 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
178 fEntryRanges.resize(fNSlots);
179 auto init = 0ULL;
180 auto end = 0ULL;
181 for (auto &&range : fEntryRanges) {
182 end = init + nEntriesInRange;
183 if (0 != reminder) { // Distribute the reminder among the first chunks
184 reminder--;
185 end += 1;
186 }
187 range.first = init;
188 range.second = end;
189 init = end;
190 }
191 }
192
193 std::string GetLabel() final { return "LazyDS"; }
194
195 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
196 GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &tid) final
197 {
198 auto colTypeIt = fColTypesMap.find(std::string(colName));
199 if (colTypeIt == fColTypesMap.end()) {
200 throw std::runtime_error("The specified column name, \"" + std::string(colName) +
201 "\" is not known to the data source.");
202 }
203
204 const auto typeName = ROOT::Internal::RDF::TypeID2TypeName(tid);
205 if (colTypeIt->second != typeName) {
206 throw std::runtime_error("Column " + std::string(colName) + " has type " + colTypeIt->second +
207 " while the id specified is associated to type " + typeName);
208 }
209
210 if (auto colNameIt = std::find(fColNames.begin(), fColNames.end(), colName); colNameIt != fColNames.end()) {
211 const auto index = std::distance(fColNames.begin(), colNameIt);
212 return std::make_unique<ROOT::Internal::RDF::RLazyDSColumnReader>(fPointerHolders[index][slot]);
213 }
214
215 throw std::runtime_error("Could not find column name \"" + std::string(colName) +
216 "\" in available column names.");
217 }
218};
219
220} // ns RDF
221
222} // ns ROOT
223
224#endif
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
RLazyDSColumnReader(ROOT::Internal::RDF::TPointerHolder *ptr)
ROOT::Internal::RDF::TPointerHolder * fPtr
Mother class of TTypedPointerHolder.
Class to wrap a pointer and delete the memory associated to it correctly.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
A RDataSource implementation which is built on top of result proxies.
RLazyDS(RLazyDS &&)=delete
const std::vector< std::string > fColNames
std::vector< PointerHolderPtrs_t > fPointerHolders
RLazyDS(const RLazyDS &)=delete
std::vector< ROOT::Internal::RDF::TPointerHolder * > PointerHolderPtrs_t
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
std::string GetLabel() final
Return a string representation of the datasource type.
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
RLazyDS & operator=(RLazyDS &&)=delete
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence< S... >)
const PointerHolderPtrs_t fPointerHoldersModels
const std::map< std::string, std::string > fColTypesMap
void ColLengthChecker(std::index_sequence< S... >)
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
std::string AsString() final
void Initialize() final
Convenience method called before starting an event-loop.
RLazyDS & operator=(const RLazyDS &)=delete
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
size_t GetEntriesNumber()
RLazyDS(std::pair< std::string, RResultPtr< std::vector< ColumnTypes > > >... colsNameVals)
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::tuple< RResultPtr< std::vector< ColumnTypes > >... > fColumns
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &tid) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Smart pointer for the return type of actions.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
Namespace for new ROOT classes and functions.
TSeq< int > TSeqI
Definition TSeq.hxx:203