Logo ROOT  
Reference Guide
RNTupleDS.hxx
Go to the documentation of this file.
1/// \file RNTupleDS.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Enrico Guiraud <enrico.guiraud@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT_RNTupleDS
18#define ROOT_RNTupleDS
19
20#include <ROOT/RDataFrame.hxx>
21#include <ROOT/RDataSource.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <vector>
29
30namespace ROOT {
31namespace Experimental {
32
33class RNTuple;
34class RNTupleDescriptor;
35
36namespace Detail {
37class RFieldBase;
38class RFieldValue;
39class RPageSource;
40} // namespace Detail
41
42namespace Internal {
43class RNTupleColumnReader;
44}
45
46class RNTupleDS final : public ROOT::RDF::RDataSource {
47 /// Clones of the first source, one for each slot
48 std::vector<std::unique_ptr<ROOT::Experimental::Detail::RPageSource>> fSources;
49
50 /// We prepare a column reader prototype for every column. If a column reader is actually requested
51 /// in GetColumnReaders(), we move a clone of the prototype into the hands of RDataFrame.
52 /// Only the clone connects to the backing page store and acquires I/O resources.
53 std::vector<std::unique_ptr<ROOT::Experimental::Internal::RNTupleColumnReader>> fColumnReaderPrototypes;
54 std::vector<std::string> fColumnNames;
55 std::vector<std::string> fColumnTypes;
56 std::vector<size_t> fActiveColumns;
57
58 unsigned fNSlots = 0;
59 bool fHasSeenAllRanges = false;
60
61 /// Provides the RDF column "colName" given the field identified by fieldID. For records and collections,
62 /// AddField recurses into the sub fields. The skeinIDs is the list of field IDs of the outer collections
63 /// of fieldId. For instance, if fieldId refers to an `std::vector<Jet>`, with
64 /// struct Jet {
65 /// float pt;
66 /// float eta;
67 /// };
68 /// AddField will recurse into Jet.pt and Jet.eta and provide the two inner fields as std::vector<float> each.
69 void AddField(const RNTupleDescriptor &desc,
70 std::string_view colName,
71 DescriptorId_t fieldId,
72 std::vector<DescriptorId_t> skeinIDs);
73
74public:
75 explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource);
77 void SetNSlots(unsigned int nSlots) final;
78 const std::vector<std::string> &GetColumnNames() const final { return fColumnNames; }
79 bool HasColumn(std::string_view colName) const final;
80 std::string GetTypeName(std::string_view colName) const final;
81 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
82 std::string GetLabel() final { return "RNTupleDS"; }
83
84 bool SetEntry(unsigned int slot, ULong64_t entry) final;
85
86 void Initialize() final;
87 void Finalize() final;
88
89 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
90 GetColumnReaders(unsigned int /*slot*/, std::string_view /*name*/, const std::type_info &) final;
91
92protected:
93 Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
94};
95
96RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName);
98
99} // ns Experimental
100} // ns ROOT
101
102#endif
unsigned long long ULong64_t
Definition: RtypesCore.h:81
char name[80]
Definition: TGX11.cxx:110
A field translates read and write calls from/to underlying columns to/from tree values.
Represents transient storage of simple or complex C++ values.
The RDataSource implementation for RNTuple.
Definition: RNTupleDS.hxx:46
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Definition: RNTupleDS.cxx:288
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId, std::vector< DescriptorId_t > skeinIDs)
Provides the RDF column "colName" given the field identified by fieldID.
Definition: RNTupleDS.cxx:161
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition: RNTupleDS.hxx:78
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
Definition: RNTupleDS.cxx:344
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RNTupleDS.cxx:298
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RNTupleColumnReader > > fColumnReaderPrototypes
We prepare a column reader prototype for every column.
Definition: RNTupleDS.hxx:53
std::vector< std::unique_ptr< ROOT::Experimental::Detail::RPageSource > > fSources
Clones of the first source, one for each slot.
Definition: RNTupleDS.hxx:48
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition: RNTupleDS.cxx:281
void Initialize() final
Convenience method called before starting an event-loop.
Definition: RNTupleDS.cxx:337
std::vector< std::string > fColumnNames
Definition: RNTupleDS.hxx:54
void Finalize() final
Convenience method called after concluding an event-loop.
Definition: RNTupleDS.cxx:342
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition: RNTupleDS.cxx:326
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition: RNTupleDS.cxx:303
RNTupleDS(std::unique_ptr< ROOT::Experimental::Detail::RPageSource > pageSource)
Definition: RNTupleDS.cxx:272
std::vector< size_t > fActiveColumns
Definition: RNTupleDS.hxx:56
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition: RNTupleDS.cxx:332
std::vector< std::string > fColumnTypes
Definition: RNTupleDS.hxx:55
std::string GetLabel() final
Return a string representation of the datasource type.
Definition: RNTupleDS.hxx:82
The on-storage meta-data of an ntuple.
Representation of an RNTuple data set in a ROOT file.
Definition: RNTuple.hxx:491
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
basic_string_view< char > string_view
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
Definition: RNTupleDS.cxx:359
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:83
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.