Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RRootDS.cxx
Go to the documentation of this file.
1#include <ROOT/RDF/Utils.hxx>
2#include <ROOT/RRootDS.hxx>
3#include <ROOT/TSeq.hxx>
4#include <TClass.h>
5#include <TError.h>
6#include <TROOT.h> // For the gROOTMutex
7#include <TVirtualMutex.h> // For the R__LOCKGUARD
9
10#include <algorithm>
11#include <vector>
12
13namespace ROOT {
14
15namespace Internal {
16
17namespace RDF {
18
19std::vector<void *> RRootDS::GetColumnReadersImpl(std::string_view name, const std::type_info &id)
20{
21 const auto colTypeName = GetTypeName(name);
22 const auto &colTypeId = ROOT::Internal::RDF::TypeName2TypeID(colTypeName);
23 if (id != colTypeId) {
24 std::string err = "The type of column \"";
25 err += name;
26 err += "\" is ";
27 err += colTypeName;
28 err += " but a different one has been selected.";
29 throw std::runtime_error(err);
30 }
31
32 const auto index =
33 std::distance(fListOfBranches.begin(), std::find(fListOfBranches.begin(), fListOfBranches.end(), name));
34 std::vector<void *> ret(fNSlots);
35 for (auto slot : ROOT::TSeqU(fNSlots)) {
36 ret[slot] = (void *)&fBranchAddresses[index][slot];
37 }
38 return ret;
39}
40
41RRootDS::RRootDS(std::string_view treeName, std::string_view fileNameGlob)
42 : fTreeName(treeName), fFileNameGlob(fileNameGlob), fModelChain(std::string(treeName).c_str())
43{
45
47 fListOfBranches.resize(lob.GetEntries());
48
49 TIterCategory<TObjArray> iter(&lob);
50 std::transform(iter.Begin(), iter.End(), fListOfBranches.begin(), [](TObject *o) { return o->GetName(); });
51}
52
54{
55 for (auto addr : fAddressesToFree) {
56 delete addr;
57 }
58}
59
60std::string RRootDS::GetTypeName(std::string_view colName) const
61{
62 if (!HasColumn(colName)) {
63 std::string e = "The dataset does not have column ";
64 e += colName;
65 throw std::runtime_error(e);
66 }
67 // TODO: we need to factor out the routine for the branch alone...
68 // Maybe a cache for the names?
69 auto typeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName(std::string(colName), &fModelChain, /*ds=*/nullptr,
70 /*define=*/nullptr);
71 // We may not have yet loaded the library where the dictionary of this type is
72 TClass::GetClass(typeName.c_str());
73 return typeName;
74}
75
76const std::vector<std::string> &RRootDS::GetColumnNames() const
77{
78 return fListOfBranches;
79}
80
81bool RRootDS::HasColumn(std::string_view colName) const
82{
83 if (!fListOfBranches.empty())
85 return fListOfBranches.end() != std::find(fListOfBranches.begin(), fListOfBranches.end(), colName);
86}
87
88void RRootDS::InitSlot(unsigned int slot, ULong64_t firstEntry)
89{
90 auto chain = new TChain(fTreeName.c_str());
91 chain->ResetBit(kMustCleanup);
92 chain->Add(fFileNameGlob.c_str());
93 chain->GetEntry(firstEntry);
94 TString setBranches;
95 for (auto i : ROOT::TSeqU(fListOfBranches.size())) {
96 auto colName = fListOfBranches[i].c_str();
97 auto &addr = fBranchAddresses[i][slot];
98 auto typeName = GetTypeName(colName);
99 auto typeClass = TClass::GetClass(typeName.c_str());
100 if (typeClass) {
101 chain->SetBranchAddress(colName, &addr, nullptr, typeClass, EDataType(0), true);
102 } else {
103 if (!addr) {
104 addr = new double();
105 fAddressesToFree.emplace_back((double *)addr);
106 }
107 chain->SetBranchAddress(colName, addr);
108 }
109 }
110 fChains[slot].reset(chain);
111}
112
113void RRootDS::FinaliseSlot(unsigned int slot)
114{
115 fChains[slot].reset(nullptr);
116}
117
118std::vector<std::pair<ULong64_t, ULong64_t>> RRootDS::GetEntryRanges()
119{
120 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
121 return entryRanges;
122}
123
124bool RRootDS::SetEntry(unsigned int slot, ULong64_t entry)
125{
126 fChains[slot]->GetEntry(entry);
127 return true;
128}
129
130void RRootDS::SetNSlots(unsigned int nSlots)
131{
132 R__ASSERT(0U == fNSlots && "Setting the number of slots even if the number of slots is different from zero.");
133
134 fNSlots = nSlots;
135
136 const auto nColumns = fListOfBranches.size();
137 // Initialise the entire set of addresses
138 fBranchAddresses.resize(nColumns, std::vector<void *>(fNSlots, nullptr));
139
140 fChains.resize(fNSlots);
141}
142
144{
145 const auto nentries = fModelChain.GetEntries();
146 const auto chunkSize = nentries / fNSlots;
147 const auto reminder = 1U == fNSlots ? 0 : nentries % fNSlots;
148 auto start = 0UL;
149 auto end = 0UL;
150 for (auto i : ROOT::TSeqU(fNSlots)) {
151 start = end;
152 end += chunkSize;
153 fEntryRanges.emplace_back(start, end);
154 (void)i;
155 }
156 fEntryRanges.back().second += reminder;
157}
158
159std::string RRootDS::GetLabel()
160{
161 return "Root";
162}
163
164RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob)
165{
166 return ROOT::RDataFrame(treeName, fileNameGlob);
167}
168
169} // ns RDF
170
171} // ns Internal
172
173} // ns ROOT
double
#define e(i)
Definition RSha256.hxx:103
unsigned long long ULong64_t
Definition RtypesCore.h:74
EDataType
Definition TDataType.h:28
#define R__ASSERT(e)
Definition TError.h:120
char name[80]
Definition TGX11.cxx:110
int nentries
@ kMustCleanup
Definition TObject.h:355
typedef void((*Func_t)())
void FinaliseSlot(unsigned int slot)
Convenience method called at the end of the data processing associated to a slot.
Definition RRootDS.cxx:113
void Initialise()
Convenience method called before starting an event-loop.
Definition RRootDS.cxx:143
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition RRootDS.cxx:81
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
Definition RRootDS.cxx:76
std::string GetLabel()
Return a string representation of the datasource type.
Definition RRootDS.cxx:159
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition RRootDS.cxx:60
RRootDS(std::string_view treeName, std::string_view fileNameGlob)
Definition RRootDS.cxx:41
std::vector< std::vector< void * > > fBranchAddresses
Definition RRootDS.hxx:35
std::vector< std::unique_ptr< TChain > > fChains
Definition RRootDS.hxx:36
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &)
type-erased vector of pointers to pointers to column values - one per slot
Definition RRootDS.cxx:19
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition RRootDS.cxx:130
std::vector< std::string > fListOfBranches
Definition RRootDS.hxx:33
std::vector< double * > fAddressesToFree
Definition RRootDS.hxx:32
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition RRootDS.cxx:118
void InitSlot(unsigned int slot, ULong64_t firstEntry)
Convenience method called at the start of the data processing associated to a slot.
Definition RRootDS.cxx:88
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition RRootDS.hxx:34
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RRootDS.cxx:124
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
virtual TObjArray * GetListOfBranches()
Return a pointer to the list of branches of the current tree.
Definition TChain.cxx:1094
virtual Long64_t GetEntries() const
Return the total number of entries in the chain.
Definition TChain.cxx:947
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition TChain.cxx:229
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2957
TIterCategory & Begin()
static TIterCategory End()
An array of TObjects.
Definition TObjArray.h:37
Int_t GetEntries() const
Return the number of objects in array (i.e.
Mother of all ROOT objects.
Definition TObject.h:37
Basic string class.
Definition TString.h:136
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:51
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RDefineBase *define, bool vector2rvec)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:223
RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob)
Definition RRootDS.cxx:164
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< unsigned int > TSeqU
Definition TSeq.hxx:195