54namespace Experimental {
66 std::unique_ptr<ROOT::RFieldBase>
CloneImpl(std::string_view )
const final
68 return std::make_unique<RRDFCardinalityField>();
103 *
static_cast<std::size_t *
>(to) =
size;
112 *
static_cast<std::size_t *
>(to) =
size;
126 std::unique_ptr<ROOT::RFieldBase>
CloneImpl(std::string_view)
const final
166 std::unique_ptr<RFieldBase::RValue>
fValue;
201 auto onDiskType =
source.GetSharedDescriptorGuard()->GetFieldDescriptor(
fField->GetOnDiskId()).GetTypeName();
202 std::string
msg =
"RNTupleDS: invalid type \"" +
fField->GetTypeName() +
"\" for column \"" +
205 throw std::runtime_error(
msg);
214 fValue = std::make_unique<RFieldBase::RValue>(
fField->CreateValue());
234 return fValue->GetPtr<
void>().get();
290 fieldDesc.GetTypeName().substr(0, 12) ==
"std::vector<" ||
fieldDesc.GetTypeName() ==
"");
320 f.SetOnDiskId(desc.
FindFieldId(
f.GetFieldName(),
f.GetParent()->GetOnDiskId()));
326 if (
info.fNRepetitions > 0) {
327 cardinalityField = std::make_unique<ROOT::Experimental::Internal::RArraySizeField>(
info.fNRepetitions);
329 cardinalityField = std::make_unique<ROOT::Experimental::Internal::RRDFCardinalityField>();
388 std::vector<ROOT::Experimental::RNTupleDS::RFieldInfo>());
399 static std::once_flag
flag;
400 std::call_once(
flag, []() {
411std::unique_ptr<ROOT::Experimental::Internal::RPageSource>
412CreatePageSource(std::string_view
ntupleName, std::string_view fileName)
442std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
457 return fld->GetTypeName() == requestedType;
464 throw std::runtime_error(
"RNTupleDS: Could not create field with type \"" +
requestedType +
465 "\" for column \"" + std::string(
name));
479 for (
const auto &s : *
field) {
483 auto reader = std::make_unique<Internal::RNTupleColumnReader>(
this,
field);
539 if (!
range.fSource) {
544 range.fSource->Attach();
562 std::unique_ptr<Internal::RPageSource>
source;
596 unsigned int iSlot = 0;
611 range.fSource->SetEntryRange({start, end - start});
612 range.fFirstEntry = start;
613 range.fLastEntry = end;
621 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
631 r->Disconnect(
true );
684 ranges.emplace_back(start, end);
714 r->Disconnect(
true );
723 auto msg = std::string(
"RNTupleDS: There is no column with name \"") + std::string(
colName) +
"\"";
724 throw std::runtime_error(
msg);
760 for (
unsigned int i = 0; i <
fNSlots; ++i) {
762 r->Disconnect(
false );
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
R__EXTERN TSystem * gSystem
An artificial field that provides the size of a fixed-size array.
~RArraySizeField() final=default
RArraySizeField(std::size_t arrayLength)
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
RArraySizeField & operator=(RArraySizeField &&other)=default
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
void ReadInClusterImpl(RNTupleLocalIndex, void *to) final
void GenerateColumns(const ROOT::RNTupleDescriptor &) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
void ReadGlobalImpl(ROOT::NTupleSize_t, void *to) final
RArraySizeField(RArraySizeField &&other)=default
std::size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
RArraySizeField(const RArraySizeField &other)=delete
std::size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
RArraySizeField & operator=(const RArraySizeField &other)=delete
Every RDF column is represented by exactly one RNTuple field.
std::unique_ptr< RFieldBase > fField
The field backing the RDF column.
Long64_t fEntryOffset
For chains, the logical entry and the physical entry in any particular file can be different.
std::unique_ptr< RFieldBase::RValue > fValue
The memory location used to read from fField.
RNTupleColumnReader(RNTupleDS *ds, RFieldBase *protoField)
Long64_t fLastEntry
Last entry number that was read.
RNTupleDS * fDataSource
The data source that owns this column reader.
void Connect(RPageSource &source, Long64_t entryOffset)
Connect the field and its subfields to the page source.
void Disconnect(bool keepValue)
void * GetImpl(Long64_t entry) final
std::shared_ptr< void > fValuePtr
Used to reuse the object created by fValue when reconnecting sources.
RFieldBase * fProtoField
The prototype field from which fField is cloned.
~RNTupleColumnReader()=default
Abstract interface to read data from an ntuple.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
An artificial field that transforms an RNTuple column that contains the offset of collections into co...
void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final
Get the number of elements of the collection identified by clusterIndex.
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final
Get the number of elements of the collection identified by globalIndex.
RRDFCardinalityField(RRDFCardinalityField &&other)=default
void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
const RColumnRepresentations & GetColumnRepresentations() const final
Implementations in derived classes should return a static RColumnRepresentations object.
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
RRDFCardinalityField & operator=(RRDFCardinalityField &&other)=default
size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
~RRDFCardinalityField()=default
The RDataSource implementation for RNTuple.
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
void AddField(const ROOT::RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos, bool convertToRVec=true)
Provides the RDF column "colName" given the field identified by fieldID.
void ExecStaging()
The main function of the fThreadStaging background thread.
ROOT::RNTupleDescriptor fPrincipalDescriptor
A clone of the first pages source's descriptor.
std::vector< std::unique_ptr< ROOT::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
std::thread fThreadStaging
The background thread that runs StageNextSources()
void Initialize() final
Convenience method called before starting an event-loop.
std::size_t fNextFileIndex
Index into fFileNames to the next file to process.
std::vector< std::string > fColumnNames
std::unordered_map< ULong64_t, std::size_t > fFirstEntry2RangeIdx
Maps the first entries from the ranges of the last GetEntryRanges() call to their corresponding index...
void Finalize() final
Convenience method called after concluding an event-loop.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
std::vector< std::vector< Internal::RNTupleColumnReader * > > fActiveColumnReaders
List of column readers returned by GetColumnReaders() organized by slot.
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::vector< REntryRangeDS > fCurrentRanges
Basis for the ranges returned by the last GetEntryRanges() call.
bool fStagingThreadShouldTerminate
Is true when the I/O thread should quit.
bool fHasNextSources
Is true when the staging thread has populated the next batch of files to fStagingArea.
std::vector< REntryRangeDS > fNextRanges
Basis for the ranges populated by the PrepareNextRanges() call.
std::mutex fMutexStaging
Protects the shared state between the main thread and the I/O thread.
void StageNextSources()
Starting from fNextFileIndex, opens the next fNSlots files.
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RPageSource > > fStagingArea
The staging area is relevant for chains of files, i.e.
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
std::unordered_map< std::size_t, std::vector< std::unique_ptr< ROOT::RFieldBase > > > fAlternativeProtoFields
Columns may be requested with types other than with which they were initially added as proto fields.
std::vector< std::string > fFileNames
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
std::vector< std::string > fColumnTypes
RNTupleDS(std::unique_ptr< ROOT::Experimental::Internal::RPageSource > pageSource)
std::unordered_map< ROOT::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
ULong64_t fSeenEntries
The number of entries so far returned by GetEntryRanges()
std::condition_variable fCvStaging
Signal for the state information of fIsReadyForStaging and fHasNextSources.
bool fIsReadyForStaging
Is true when the staging thread should start working.
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
static void SetClusterBunchSize(RNTupleReadOptions &options, unsigned int val)
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Base class for all ROOT issued exceptions.
The list of column representations a field can have.
A field translates read and write calls from/to underlying columns to/from tree values.
ROOT::Internal::RColumn * fPrincipalColumn
All fields that have columns have a distinct main column.
RConstSchemaIterator cbegin() const
const std::string & GetFieldName() const
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
ROOT::DescriptorId_t GetOnDiskId() const
std::unique_ptr< RFieldBase > Clone(std::string_view newName) const
Copies the field and its subfields using a possibly new name and a new, unconnected set of columns.
The on-storage metadata of an RNTuple.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Common user-tunable settings for reading RNTuples.
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
virtual const char * Getenv(const char *env)
Get environment variable.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
void CallConnectPageSourceOnField(RFieldBase &, ROOT::Experimental::Internal::RPageSource &)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.