Logo ROOT   6.18/05
Reference Guide
RArrowDS.cxx
Go to the documentation of this file.
1// Author: Giulio Eulisse CERN 2/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11// clang-format off
12/** \class ROOT::RDF::RArrowDS
13 \ingroup dataframe
14 \brief RDataFrame data source class to interface with Apache Arrow.
15
16The RArrowDS implements a proxy RDataSource to be able to use Apache Arrow
17tables with RDataFrame.
18
19A RDataFrame that adapts an arrow::Table class can be constructed using the factory method
20ROOT::RDF::MakeArrowDataFrame, which accepts one parameter:
211. An arrow::Table smart pointer.
22
23The types of the columns are derived from the types in the associated
24arrow::Schema.
25
26*/
27// clang-format on
28
29#include <ROOT/RDF/Utils.hxx>
30#include <ROOT/TSeq.hxx>
31#include <ROOT/RArrowDS.hxx>
32#include <ROOT/RMakeUnique.hxx>
33
34#include <algorithm>
35#include <sstream>
36#include <string>
37
38#if defined(__GNUC__)
39#pragma GCC diagnostic push
40#pragma GCC diagnostic ignored "-Wshadow"
41#pragma GCC diagnostic ignored "-Wunused-parameter"
42#endif
43#include <arrow/table.h>
44#include <arrow/stl.h>
45#if defined(__GNUC__)
46#pragma GCC diagnostic pop
47#endif
48
49namespace ROOT {
50namespace Internal {
51namespace RDF {
52
53// This is needed by Arrow 0.12.0 which dropped
54//
55// using ArrowType = ArrowType_;
56//
57// from ARROW_STL_CONVERSION
58template <typename T>
59struct RootConversionTraits {};
60
61#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_) \
62 template <> \
63 struct RootConversionTraits<c_type> { \
64 using ArrowType = ::arrow::ArrowType_; \
65 };
66
67ROOT_ARROW_STL_CONVERSION(bool, BooleanType)
68ROOT_ARROW_STL_CONVERSION(int8_t, Int8Type)
69ROOT_ARROW_STL_CONVERSION(int16_t, Int16Type)
70ROOT_ARROW_STL_CONVERSION(int32_t, Int32Type)
72ROOT_ARROW_STL_CONVERSION(uint8_t, UInt8Type)
73ROOT_ARROW_STL_CONVERSION(uint16_t, UInt16Type)
74ROOT_ARROW_STL_CONVERSION(uint32_t, UInt32Type)
76ROOT_ARROW_STL_CONVERSION(float, FloatType)
77ROOT_ARROW_STL_CONVERSION(double, DoubleType)
78ROOT_ARROW_STL_CONVERSION(std::string, StringType)
79
80// Per slot visitor of an Array.
81class ArrayPtrVisitor : public ::arrow::ArrayVisitor {
82private:
83 /// The pointer to update.
84 void **fResult;
85 bool fCachedBool{false}; // Booleans need to be unpacked, so we use a cached entry.
86 // FIXME: I should really use a variant here
87 RVec<float> fCachedRVecFloat;
88 RVec<double> fCachedRVecDouble;
89 RVec<ULong64_t> fCachedRVecULong64;
90 RVec<UInt_t> fCachedRVecUInt;
91 RVec<Long64_t> fCachedRVecLong64;
92 RVec<Int_t> fCachedRVecInt;
93 std::string fCachedString;
94 /// The entry in the array which should be looked up.
95 ULong64_t fCurrentEntry;
96
97 template <typename T>
98 void *getTypeErasedPtrFrom(arrow::ListArray const &array, int32_t entry, RVec<T> &cache)
99 {
100 using ArrowType = typename RootConversionTraits<T>::ArrowType;
101 using ArrayType = typename arrow::TypeTraits<ArrowType>::ArrayType;
102 auto values = reinterpret_cast<ArrayType *>(array.values().get());
103 auto offset = array.value_offset(entry);
104 // Here the cast to void* is a worksround while we figure out the
105 // issues we have with long long types, signed and unsigned.
106 RVec<T> tmp(reinterpret_cast<T *>((void *)values->raw_values()) + offset, array.value_length(entry));
107 std::swap(cache, tmp);
108 return (void *)(&cache);
109 }
110
111public:
112 ArrayPtrVisitor(void **result) : fResult{result}, fCurrentEntry{0} {}
113
114 void SetEntry(ULong64_t entry) { fCurrentEntry = entry; }
115
116 /// Check if we are asking the same entry as before.
117 virtual arrow::Status Visit(arrow::Int32Array const &array) final
118 {
119 *fResult = (void *)(array.raw_values() + fCurrentEntry);
120 return arrow::Status::OK();
121 }
122
123 virtual arrow::Status Visit(arrow::Int64Array const &array) final
124 {
125 *fResult = (void *)(array.raw_values() + fCurrentEntry);
126 return arrow::Status::OK();
127 }
128
129 /// Check if we are asking the same entry as before.
130 virtual arrow::Status Visit(arrow::UInt32Array const &array) final
131 {
132 *fResult = (void *)(array.raw_values() + fCurrentEntry);
133 return arrow::Status::OK();
134 }
135
136 virtual arrow::Status Visit(arrow::UInt64Array const &array) final
137 {
138 *fResult = (void *)(array.raw_values() + fCurrentEntry);
139 return arrow::Status::OK();
140 }
141
142 virtual arrow::Status Visit(arrow::FloatArray const &array) final
143 {
144 *fResult = (void *)(array.raw_values() + fCurrentEntry);
145 return arrow::Status::OK();
146 }
147
148 virtual arrow::Status Visit(arrow::DoubleArray const &array) final
149 {
150 *fResult = (void *)(array.raw_values() + fCurrentEntry);
151 return arrow::Status::OK();
152 }
153
154 virtual arrow::Status Visit(arrow::BooleanArray const &array) final
155 {
156 fCachedBool = array.Value(fCurrentEntry);
157 *fResult = reinterpret_cast<void *>(&fCachedBool);
158 return arrow::Status::OK();
159 }
160
161 virtual arrow::Status Visit(arrow::StringArray const &array) final
162 {
163 fCachedString = array.GetString(fCurrentEntry);
164 *fResult = reinterpret_cast<void *>(&fCachedString);
165 return arrow::Status::OK();
166 }
167
168 virtual arrow::Status Visit(arrow::ListArray const &array) final
169 {
170 switch (array.value_type()->id()) {
171 case arrow::Type::FLOAT: {
172 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecFloat);
173 return arrow::Status::OK();
174 }
175 case arrow::Type::DOUBLE: {
176 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecDouble);
177 return arrow::Status::OK();
178 }
179 case arrow::Type::UINT32: {
180 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecUInt);
181 return arrow::Status::OK();
182 }
183 case arrow::Type::UINT64: {
184 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecULong64);
185 return arrow::Status::OK();
186 }
187 case arrow::Type::INT32: {
188 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecInt);
189 return arrow::Status::OK();
190 }
191 case arrow::Type::INT64: {
192 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecLong64);
193 return arrow::Status::OK();
194 }
195 default: return arrow::Status::TypeError("Type not supported");
196 }
197 }
198
199 using ::arrow::ArrayVisitor::Visit;
200};
201
202/// Helper class which keeps track for each slot where to get the entry.
203class TValueGetter {
204private:
205 std::vector<void *> fValuesPtrPerSlot;
206 std::vector<ULong64_t> fLastEntryPerSlot;
207 std::vector<ULong64_t> fLastChunkPerSlot;
208 std::vector<ULong64_t> fFirstEntryPerChunk;
209 std::vector<ArrayPtrVisitor> fArrayVisitorPerSlot;
210 /// Since data can be chunked in different arrays we need to construct an
211 /// index which contains the first element of each chunk, so that we can
212 /// quickly move to the correct chunk.
213 std::vector<ULong64_t> fChunkIndex;
214 arrow::ArrayVector fChunks;
215
216public:
217 TValueGetter(size_t slots, arrow::ArrayVector chunks)
218 : fValuesPtrPerSlot(slots, nullptr), fLastEntryPerSlot(slots, 0), fLastChunkPerSlot(slots, 0), fChunks{chunks}
219 {
220 fChunkIndex.reserve(fChunks.size());
221 size_t next = 0;
222 for (auto &chunk : chunks) {
223 fFirstEntryPerChunk.push_back(next);
224 next += chunk->length();
225 fChunkIndex.push_back(next);
226 }
227 for (size_t si = 0, se = fValuesPtrPerSlot.size(); si != se; ++si) {
228 fArrayVisitorPerSlot.push_back(ArrayPtrVisitor{fValuesPtrPerSlot.data() + si});
229 }
230 }
231
232 /// This returns the ptr to the ptr to actual data.
233 std::vector<void *> SlotPtrs()
234 {
235 std::vector<void *> result;
236 for (size_t i = 0; i < fValuesPtrPerSlot.size(); ++i) {
237 result.push_back(fValuesPtrPerSlot.data() + i);
238 }
239 return result;
240 }
241
242 // Convenience method to avoid code duplication between
243 // SetEntry and InitSlot
244 void UncachedSlotLookup(unsigned int slot, ULong64_t entry)
245 {
246 // If entry is greater than the previous one,
247 // we can skip all the chunks before the last one we
248 // queried.
249 size_t ci = 0;
250 assert(slot < fLastChunkPerSlot.size());
251 if (fLastEntryPerSlot[slot] < entry) {
252 ci = fLastChunkPerSlot.at(slot);
253 }
254
255 for (size_t ce = fChunkIndex.size(); ci != ce; ++ci) {
256 if (entry < fChunkIndex[ci]) {
257 assert(slot < fLastChunkPerSlot.size());
258 fLastChunkPerSlot[slot] = ci;
259 break;
260 }
261 }
262
263 // Update the pointer to the requested entry.
264 // Notice that we need to find the entry
265 auto chunk = fChunks.at(fLastChunkPerSlot[slot]);
266 assert(slot < fArrayVisitorPerSlot.size());
267 fArrayVisitorPerSlot[slot].SetEntry(entry - fFirstEntryPerChunk[fLastChunkPerSlot[slot]]);
268 fLastEntryPerSlot[slot] = entry;
269 auto status = chunk->Accept(fArrayVisitorPerSlot.data() + slot);
270 if (!status.ok()) {
271 std::string msg = "Could not get pointer for slot ";
272 msg += std::to_string(slot) + " looking at entry " + std::to_string(entry);
273 throw std::runtime_error(msg);
274 }
275 }
276
277 /// Set the current entry to be retrieved
278 void SetEntry(unsigned int slot, ULong64_t entry)
279 {
280 // Same entry as before
281 if (fLastEntryPerSlot[slot] == entry) {
282 return;
283 }
284 UncachedSlotLookup(slot, entry);
285 }
286};
287
288} // namespace RDF
289} // namespace Internal
290
291namespace RDF {
292
293/// Helper to get the contents of a given column
294
295/// Helper to get the human readable name of type
296class RDFTypeNameGetter : public ::arrow::TypeVisitor {
297private:
298 std::vector<std::string> fTypeName;
299
300public:
301 arrow::Status Visit(const arrow::Int64Type &) override
302 {
303 fTypeName.push_back("Long64_t");
304 return arrow::Status::OK();
305 }
306 arrow::Status Visit(const arrow::Int32Type &) override
307 {
308 fTypeName.push_back("Int_t");
309 return arrow::Status::OK();
310 }
311 arrow::Status Visit(const arrow::UInt64Type &) override
312 {
313 fTypeName.push_back("ULong64_t");
314 return arrow::Status::OK();
315 }
316 arrow::Status Visit(const arrow::UInt32Type &) override
317 {
318 fTypeName.push_back("UInt_t");
319 return arrow::Status::OK();
320 }
321 arrow::Status Visit(const arrow::FloatType &) override
322 {
323 fTypeName.push_back("float");
324 return arrow::Status::OK();
325 }
326 arrow::Status Visit(const arrow::DoubleType &) override
327 {
328 fTypeName.push_back("double");
329 return arrow::Status::OK();
330 }
331 arrow::Status Visit(const arrow::StringType &) override
332 {
333 fTypeName.push_back("string");
334 return arrow::Status::OK();
335 }
336 arrow::Status Visit(const arrow::BooleanType &) override
337 {
338 fTypeName.push_back("bool");
339 return arrow::Status::OK();
340 }
341 arrow::Status Visit(const arrow::ListType &l) override
342 {
343 /// Recursively visit List types and map them to
344 /// an RVec. We accumulate the result of the recursion on
345 /// fTypeName so that we can create the actual type
346 /// when the recursion is done.
347 fTypeName.push_back("ROOT::VecOps::RVec<%s>");
348 return l.value_type()->Accept(this);
349 }
350 std::string result()
351 {
352 // This recursively builds a nested type.
353 std::string result = "%s";
354 char buffer[8192];
355 for (size_t i = 0; i < fTypeName.size(); ++i) {
356 snprintf(buffer, 8192, result.c_str(), fTypeName[i].c_str());
357 result = buffer;
358 }
359 return result;
360 }
361
362 using ::arrow::TypeVisitor::Visit;
363};
364
365/// Helper to determine if a given Column is a supported type.
366class VerifyValidColumnType : public ::arrow::TypeVisitor {
367private:
368public:
369 virtual arrow::Status Visit(const arrow::Int64Type &) override { return arrow::Status::OK(); }
370 virtual arrow::Status Visit(const arrow::UInt64Type &) override { return arrow::Status::OK(); }
371 virtual arrow::Status Visit(const arrow::Int32Type &) override { return arrow::Status::OK(); }
372 virtual arrow::Status Visit(const arrow::UInt32Type &) override { return arrow::Status::OK(); }
373 virtual arrow::Status Visit(const arrow::FloatType &) override { return arrow::Status::OK(); }
374 virtual arrow::Status Visit(const arrow::DoubleType &) override { return arrow::Status::OK(); }
375 virtual arrow::Status Visit(const arrow::StringType &) override { return arrow::Status::OK(); }
376 virtual arrow::Status Visit(const arrow::BooleanType &) override { return arrow::Status::OK(); }
377 virtual arrow::Status Visit(const arrow::ListType &) override { return arrow::Status::OK(); }
378
379 using ::arrow::TypeVisitor::Visit;
380};
381
382////////////////////////////////////////////////////////////////////////
383/// Constructor to create an Arrow RDataSource for RDataFrame.
384/// \param[in] table the arrow Table to observe.
385/// \param[in] columns the name of the columns to use
386/// In case columns is empty, we use all the columns found in the table
387RArrowDS::RArrowDS(std::shared_ptr<arrow::Table> inTable, std::vector<std::string> const &inColumns)
388 : fTable{inTable}, fColumnNames{inColumns}
389{
390 auto &columnNames = fColumnNames;
391 auto &table = fTable;
392 auto &index = fGetterIndex;
393 // We want to allow people to specify which columns they
394 // need so that we can think of upfront IO optimizations.
395 auto filterWantedColumns = [&columnNames, &table]() {
396 if (columnNames.empty()) {
397 for (auto &field : table->schema()->fields()) {
398 columnNames.push_back(field->name());
399 }
400 }
401 };
402
403 // To support both arrow 0.14.0 and 0.16.0
404 using ColumnType = decltype(fTable->column(0));
405
406 auto getRecordsFirstColumn = [&columnNames, &table]() {
407 if (columnNames.empty()) {
408 throw std::runtime_error("At least one column required");
409 }
410 const auto name = columnNames.front();
411 const auto columnIdx = table->schema()->GetFieldIndex(name);
412 return table->column(columnIdx)->length();
413 };
414
415 // All columns are supposed to have the same number of entries.
416 auto verifyColumnSize = [&table](ColumnType column, int columnIdx, int nRecords) {
417 if (column->length() != nRecords) {
418 std::string msg = "Column ";
419 msg += table->schema()->field(columnIdx)->name() + " has a different number of entries.";
420 throw std::runtime_error(msg);
421 }
422 };
423
424 /// For the moment we support only a few native types.
425 auto verifyColumnType = [&table](ColumnType column, int columnIdx) {
426 auto verifyType = std::make_unique<VerifyValidColumnType>();
427 auto result = column->type()->Accept(verifyType.get());
428 if (result.ok() == false) {
429 std::string msg = "Column ";
430 msg += table->schema()->field(columnIdx)->name() + " contains an unsupported type.";
431 throw std::runtime_error(msg);
432 }
433 };
434
435 /// This is used to create an index between the columnId
436 /// and the associated getter.
437 auto addColumnToGetterIndex = [&index](int columnId) { index.push_back(std::make_pair(columnId, index.size())); };
438
439 /// Assuming we can get called more than once, we need to
440 /// reset the getter index each time.
441 auto resetGetterIndex = [&index]() { index.clear(); };
442
443 /// This is what initialization actually does
444 filterWantedColumns();
445 resetGetterIndex();
446 auto nRecords = getRecordsFirstColumn();
447 for (auto &columnName : fColumnNames) {
448 auto columnIdx = fTable->schema()->GetFieldIndex(columnName);
449 addColumnToGetterIndex(columnIdx);
450
451 auto column = fTable->column(columnIdx);
452 verifyColumnSize(column, columnIdx, nRecords);
453 verifyColumnType(column, columnIdx);
454 }
455}
456
457////////////////////////////////////////////////////////////////////////
458/// Destructor.
460{
461}
462
463const std::vector<std::string> &RArrowDS::GetColumnNames() const
464{
465 return fColumnNames;
466}
467
468std::vector<std::pair<ULong64_t, ULong64_t>> RArrowDS::GetEntryRanges()
469{
470 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
471 return entryRanges;
472}
473
474std::string RArrowDS::GetTypeName(std::string_view colName) const
475{
476 auto field = fTable->schema()->GetFieldByName(std::string(colName));
477 if (!field) {
478 std::string msg = "The dataset does not have column ";
479 msg += colName;
480 throw std::runtime_error(msg);
481 }
482 RDFTypeNameGetter typeGetter;
483 auto status = field->type()->Accept(&typeGetter);
484 if (status.ok() == false) {
485 std::string msg = "RArrowDS does not support a column of type ";
486 msg += field->type()->name();
487 throw std::runtime_error(msg);
488 }
489 return typeGetter.result();
490}
491
493{
494 auto field = fTable->schema()->GetFieldByName(std::string(colName));
495 if (!field) {
496 return false;
497 }
498 return true;
499}
500
501bool RArrowDS::SetEntry(unsigned int slot, ULong64_t entry)
502{
503 for (auto link : fGetterIndex) {
504 auto &getter = fValueGetters[link.second];
505 getter->SetEntry(slot, entry);
506 }
507 return true;
508}
509
510void RArrowDS::InitSlot(unsigned int slot, ULong64_t entry)
511{
512 for (auto link : fGetterIndex) {
513 auto &getter = fValueGetters[link.second];
514 getter->UncachedSlotLookup(slot, entry);
515 }
516}
517
518void splitInEqualRanges(std::vector<std::pair<ULong64_t, ULong64_t>> &ranges, int nRecords, unsigned int nSlots)
519{
520 ranges.clear();
521 const auto chunkSize = nRecords / nSlots;
522 const auto remainder = 1U == nSlots ? 0 : nRecords % nSlots;
523 auto start = 0UL;
524 auto end = 0UL;
525 for (auto i : ROOT::TSeqU(nSlots)) {
526 start = end;
527 end += chunkSize;
528 ranges.emplace_back(start, end);
529 (void)i;
530 }
531 ranges.back().second += remainder;
532}
533
534int getNRecords(std::shared_ptr<arrow::Table> &table, std::vector<std::string> &columnNames)
535{
536 auto index = table->schema()->GetFieldIndex(columnNames.front());
537 return table->column(index)->length();
538};
539
540template <typename T>
541std::shared_ptr<arrow::ChunkedArray> getData(T p)
542{
543 return p->data();
544}
545
546template <>
547std::shared_ptr<arrow::ChunkedArray>
548getData<std::shared_ptr<arrow::ChunkedArray>>(std::shared_ptr<arrow::ChunkedArray> p)
549{
550 return p;
551}
552
553void RArrowDS::SetNSlots(unsigned int nSlots)
554{
555 assert(0U == fNSlots && "Setting the number of slots even if the number of slots is different from zero.");
556 fNSlots = nSlots;
557 // We dump all the previous getters structures and we rebuild it.
558 auto nColumns = fGetterIndex.size();
559
560 fValueGetters.clear();
561 for (size_t ci = 0; ci != nColumns; ++ci) {
562 auto chunkedArray = getData(fTable->column(fGetterIndex[ci].first));
563 fValueGetters.emplace_back(std::make_unique<ROOT::Internal::RDF::TValueGetter>(nSlots, chunkedArray->chunks()));
564 }
565}
566
567/// This needs to return a pointer to the pointer each value getter
568/// will point to.
569std::vector<void *> RArrowDS::GetColumnReadersImpl(std::string_view colName, const std::type_info &)
570{
571 auto &index = fGetterIndex;
572 auto findGetterIndex = [&index](unsigned int column) {
573 for (auto &entry : index) {
574 if (entry.first == column) {
575 return entry.second;
576 }
577 }
578 throw std::runtime_error("No column found at index " + std::to_string(column));
579 };
580
581 const int columnIdx = fTable->schema()->GetFieldIndex(std::string(colName));
582 const int getterIdx = findGetterIndex(columnIdx);
583 assert(getterIdx != -1);
584 assert((unsigned int)getterIdx < fValueGetters.size());
585 return fValueGetters[getterIdx]->SlotPtrs();
586}
587
589{
590 auto nRecords = getNRecords(fTable, fColumnNames);
592}
593
595{
596 return "ArrowDS";
597}
598
599/// Creates a RDataFrame using an arrow::Table as input.
600/// \param[in] table the arrow Table to observe.
601/// \param[in] columnNames the name of the columns to use
602/// In case columnNames is empty, we use all the columns found in the table
603RDataFrame MakeArrowDataFrame(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columnNames)
604{
605 ROOT::RDataFrame tdf(std::make_unique<RArrowDS>(table, columnNames));
606 return tdf;
607}
608
609} // namespace RDF
610
611} // namespace ROOT
#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_)
Definition: RArrowDS.cxx:61
long long Long64_t
Definition: RtypesCore.h:69
unsigned long long ULong64_t
Definition: RtypesCore.h:70
char name[80]
Definition: TGX11.cxx:109
typedef void((*Func_t)())
#define snprintf
Definition: civetweb.c:1540
bool HasColumn(std::string_view colName) const override
Checks if the dataset has a certain column.
Definition: RArrowDS.cxx:492
RArrowDS(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Constructor to create an Arrow RDataSource for RDataFrame.
Definition: RArrowDS.cxx:387
void Initialise() override
Convenience method called before starting an event-loop.
Definition: RArrowDS.cxx:588
std::string GetLabel() override
Return a string representation of the datasource type.
Definition: RArrowDS.cxx:594
void SetNSlots(unsigned int nSlots) override
Inform RDataSource of the number of processing slots (i.e.
Definition: RArrowDS.cxx:553
~RArrowDS()
Destructor.
Definition: RArrowDS.cxx:459
const std::vector< std::string > & GetColumnNames() const override
Returns a reference to the collection of the dataset's column names.
Definition: RArrowDS.cxx:463
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
Convenience method called at the start of the data processing associated to a slot.
Definition: RArrowDS.cxx:510
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() override
Return ranges of entries to distribute to tasks.
Definition: RArrowDS.cxx:468
std::shared_ptr< arrow::Table > fTable
Definition: RArrowDS.hxx:24
std::vector< std::pair< size_t, size_t > > fGetterIndex
Definition: RArrowDS.hxx:29
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
Definition: RArrowDS.hxx:30
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) override
This needs to return a pointer to the pointer each value getter will point to.
Definition: RArrowDS.cxx:569
std::vector< std::string > fColumnNames
Definition: RArrowDS.hxx:26
std::string GetTypeName(std::string_view colName) const override
Type of a column as a string, e.g.
Definition: RArrowDS.cxx:474
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RArrowDS.hxx:25
bool SetEntry(unsigned int slot, ULong64_t entry) override
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RArrowDS.cxx:501
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
basic_string_view< char > string_view
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
double T(double x)
Definition: ChebyshevPol.h:34
void splitInEqualRanges(std::vector< std::pair< ULong64_t, ULong64_t > > &ranges, int nRecords, unsigned int nSlots)
Definition: RArrowDS.cxx:518
int getNRecords(std::shared_ptr< arrow::Table > &table, std::vector< std::string > &columnNames)
Definition: RArrowDS.cxx:534
std::shared_ptr< arrow::ChunkedArray > getData(T p)
Definition: RArrowDS.cxx:541
RDataFrame MakeArrowDataFrame(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Factory method to create a Apache Arrow RDataFrame.
Definition: RArrowDS.cxx:603
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
auto * l
Definition: textangle.C:4