Logo ROOT   6.16/01
Reference Guide
RRootDS.cxx
Go to the documentation of this file.
1#include <ROOT/RDF/Utils.hxx>
2#include <ROOT/RRootDS.hxx>
3#include <ROOT/TSeq.hxx>
4#include <TClass.h>
5#include <TError.h>
6#include <TROOT.h> // For the gROOTMutex
7#include <TVirtualMutex.h> // For the R__LOCKGUARD
9
10#include <algorithm>
11#include <vector>
12
13namespace ROOT {
14
15namespace RDF {
16
17std::vector<void *> RRootDS::GetColumnReadersImpl(std::string_view name, const std::type_info &id)
18{
19 const auto colTypeName = GetTypeName(name);
20 const auto &colTypeId = ROOT::Internal::RDF::TypeName2TypeID(colTypeName);
21 if (id != colTypeId) {
22 std::string err = "The type of column \"";
23 err += name;
24 err += "\" is ";
25 err += colTypeName;
26 err += " but a different one has been selected.";
27 throw std::runtime_error(err);
28 }
29
30 const auto index =
31 std::distance(fListOfBranches.begin(), std::find(fListOfBranches.begin(), fListOfBranches.end(), name));
32 std::vector<void *> ret(fNSlots);
33 for (auto slot : ROOT::TSeqU(fNSlots)) {
34 ret[slot] = (void *)&fBranchAddresses[index][slot];
35 }
36 return ret;
37}
38
40 : fTreeName(treeName), fFileNameGlob(fileNameGlob), fModelChain(std::string(treeName).c_str())
41{
43
45 fListOfBranches.resize(lob.GetEntries());
46
47 TIterCategory<TObjArray> iter(&lob);
48 std::transform(iter.Begin(), iter.End(), fListOfBranches.begin(), [](TObject *o) { return o->GetName(); });
49}
50
52{
53 for (auto addr : fAddressesToFree) {
54 delete addr;
55 }
56}
57
58std::string RRootDS::GetTypeName(std::string_view colName) const
59{
60 if (!HasColumn(colName)) {
61 std::string e = "The dataset does not have column ";
62 e += colName;
63 throw std::runtime_error(e);
64 }
65 // TODO: we need to factor out the routine for the branch alone...
66 // Maybe a cache for the names?
67 auto typeName =
68 ROOT::Internal::RDF::ColumnName2ColumnTypeName(std::string(colName), /*nsID=*/0, &fModelChain, /*ds=*/nullptr,
69 /*isCustomCol=*/false);
70 // We may not have yet loaded the library where the dictionary of this type is
71 TClass::GetClass(typeName.c_str());
72 return typeName;
73}
74
75const std::vector<std::string> &RRootDS::GetColumnNames() const
76{
77 return fListOfBranches;
78}
79
81{
82 if (!fListOfBranches.empty())
84 return fListOfBranches.end() != std::find(fListOfBranches.begin(), fListOfBranches.end(), colName);
85}
86
87void RRootDS::InitSlot(unsigned int slot, ULong64_t firstEntry)
88{
89 auto chain = new TChain(fTreeName.c_str());
90 chain->ResetBit(kMustCleanup);
91 chain->Add(fFileNameGlob.c_str());
92 chain->GetEntry(firstEntry);
93 TString setBranches;
94 for (auto i : ROOT::TSeqU(fListOfBranches.size())) {
95 auto colName = fListOfBranches[i].c_str();
96 auto &addr = fBranchAddresses[i][slot];
97 auto typeName = GetTypeName(colName);
98 auto typeClass = TClass::GetClass(typeName.c_str());
99 if (typeClass) {
100 chain->SetBranchAddress(colName, &addr, nullptr, typeClass, EDataType(0), true);
101 } else {
102 if (!addr) {
103 addr = new double();
104 fAddressesToFree.emplace_back((double *)addr);
105 }
106 chain->SetBranchAddress(colName, addr);
107 }
108 }
109 fChains[slot].reset(chain);
110}
111
112void RRootDS::FinaliseSlot(unsigned int slot)
113{
114 fChains[slot].reset(nullptr);
115}
116
117std::vector<std::pair<ULong64_t, ULong64_t>> RRootDS::GetEntryRanges()
118{
119 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
120 return entryRanges;
121}
122
123bool RRootDS::SetEntry(unsigned int slot, ULong64_t entry)
124{
125 fChains[slot]->GetEntry(entry);
126 return true;
127}
128
129void RRootDS::SetNSlots(unsigned int nSlots)
130{
131 R__ASSERT(0U == fNSlots && "Setting the number of slots even if the number of slots is different from zero.");
132
133 fNSlots = nSlots;
134
135 const auto nColumns = fListOfBranches.size();
136 // Initialise the entire set of addresses
137 fBranchAddresses.resize(nColumns, std::vector<void *>(fNSlots, nullptr));
138
139 fChains.resize(fNSlots);
140}
141
143{
144 const auto nentries = fModelChain.GetEntries();
145 const auto chunkSize = nentries / fNSlots;
146 const auto reminder = 1U == fNSlots ? 0 : nentries % fNSlots;
147 auto start = 0UL;
148 auto end = 0UL;
149 for (auto i : ROOT::TSeqU(fNSlots)) {
150 start = end;
151 end += chunkSize;
152 fEntryRanges.emplace_back(start, end);
153 (void)i;
154 }
155 fEntryRanges.back().second += reminder;
156}
157
158std::string RRootDS::GetLabel()
159{
160 return "Root";
161}
162
164{
165 ROOT::RDataFrame tdf(std::make_unique<RRootDS>(treeName, fileNameGlob));
166 return tdf;
167}
168
169} // ns RDF
170
171} // ns ROOT
#define e(i)
Definition: RSha256.hxx:103
unsigned long long ULong64_t
Definition: RtypesCore.h:70
EDataType
Definition: TDataType.h:28
#define R__ASSERT(e)
Definition: TError.h:96
int nentries
Definition: THbookFile.cxx:89
@ kMustCleanup
Definition: TObject.h:343
typedef void((*Func_t)())
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition: RRootDS.cxx:117
std::vector< std::unique_ptr< TChain > > fChains
Definition: RRootDS.hxx:34
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
Definition: RRootDS.cxx:75
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RRootDS.hxx:32
std::string fFileNameGlob
Definition: RRootDS.hxx:28
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition: RRootDS.cxx:80
unsigned int fNSlots
Definition: RRootDS.hxx:26
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
Definition: RRootDS.cxx:17
RRootDS(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:39
std::vector< std::vector< void * > > fBranchAddresses
Definition: RRootDS.hxx:33
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RRootDS.cxx:123
void Initialise()
Convenience method called before starting an event-loop.
Definition: RRootDS.cxx:142
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition: RRootDS.cxx:129
std::string fTreeName
Definition: RRootDS.hxx:27
void FinaliseSlot(unsigned int slot)
Convenience method called at the end of the data processing associated to a slot.
Definition: RRootDS.cxx:112
std::vector< std::string > fListOfBranches
Definition: RRootDS.hxx:31
TChain fModelChain
Definition: RRootDS.hxx:29
std::string GetLabel()
Return a string representation of the datasource type.
Definition: RRootDS.cxx:158
std::vector< double * > fAddressesToFree
Definition: RRootDS.hxx:30
void InitSlot(unsigned int slot, ULong64_t firstEntry)
Convenience method called at the start of the data processing associated to a slot.
Definition: RRootDS.cxx:87
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition: RRootDS.cxx:58
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:41
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
virtual TObjArray * GetListOfBranches()
Return a pointer to the list of branches of the current tree.
Definition: TChain.cxx:1084
virtual Long64_t GetEntries() const
Return the total number of entries in the chain.
Definition: TChain.cxx:937
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:222
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2885
TIterCategory & Begin()
Definition: TCollection.h:278
static TIterCategory End()
Definition: TCollection.h:279
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:40
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec, unsigned int customColID)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:186
RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob)
Definition: RRootDS.cxx:163
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
STL namespace.
basic_string_view< char > string_view
Definition: RStringView.hxx:35