29 switch (entryStatus) {
33 default:
return false;
37std::tuple<bool, std::string, ROOT::Internal::RDF::RTreeUntypedArrayColumnReader::ECollectionType>
38GetCollectionInfo(
const std::string &typeName)
40 const auto beginType = typeName.substr(0, typeName.find_first_of(
'<') + 1);
43 if (
auto pos = beginType.find(
"RVec<"); pos != std::string::npos) {
44 const auto begin = typeName.find_first_of(
'<', pos) + 1;
45 const auto end = typeName.find_last_of(
'>');
46 const auto innerTypeName = typeName.substr(begin, end - begin);
47 if (innerTypeName.find(
"bool") != std::string::npos)
54 if (
auto pos = beginType.find(
"array<"); pos != std::string::npos) {
55 const auto begin = typeName.find_first_of(
'<', pos) + 1;
56 const auto end = typeName.find_last_of(
'>');
57 const auto arrTemplArgs = typeName.substr(begin, end - begin);
58 const auto lastComma = arrTemplArgs.find_last_of(
',');
59 return {
true, arrTemplArgs.substr(0, lastComma),
64 if (
auto pos = beginType.find(
"vector<"); pos != std::string::npos) {
65 const auto begin = typeName.find_first_of(
'<', pos) + 1;
66 const auto end = typeName.find_last_of(
'>');
67 const auto innerTypeName = typeName.substr(begin, end - begin);
74bool ContainsLeaf(
const std::set<TLeaf *> &leaves,
TLeaf *leaf)
76 return (leaves.find(leaf) != leaves.end());
82void InsertBranchName(std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
const std::string &branchName,
83 const std::string &friendName,
bool allowDuplicates)
85 if (!friendName.empty()) {
87 const auto friendBName = friendName +
"." +
branchName;
88 if (bNamesReg.insert(friendBName).second)
89 bNames.push_back(friendBName);
92 if (allowDuplicates || friendName.empty()) {
93 if (bNamesReg.insert(branchName).second)
94 bNames.push_back(branchName);
100void InsertBranchName(std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
const std::string &branchName,
101 const std::string &friendName, std::set<TLeaf *> &foundLeaves,
TLeaf *leaf,
bool allowDuplicates)
103 const bool canAdd = allowDuplicates ? true : !ContainsLeaf(foundLeaves, leaf);
108 InsertBranchName(bNamesReg, bNames, branchName, friendName, allowDuplicates);
110 foundLeaves.insert(leaf);
113void ExploreBranch(
TTree &t, std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
TBranch *
b,
114 std::string prefix, std::string &friendName,
bool allowDuplicates)
122 auto prefixEndingDot = std::string::npos;
123 if (!prefix.empty() && prefix.back() ==
'.')
124 prefixEndingDot = prefix.size() - 2;
125 std::string lastPrefixToken{};
126 if (
auto prefixLastRealDot = prefix.find_last_of(
'.', prefixEndingDot); prefixLastRealDot != std::string::npos)
127 lastPrefixToken = prefix.substr(prefixLastRealDot + 1, prefixEndingDot - prefixLastRealDot);
129 for (
auto sb : *
b->GetListOfBranches()) {
131 auto subBranchName = std::string(subBranch->
GetName());
132 auto fullName = prefix + subBranchName;
134 if (
auto subNameFirstDot = subBranchName.find_first_of(
'.'); subNameFirstDot != std::string::npos) {
136 if (!lastPrefixToken.empty() && lastPrefixToken == subBranchName.substr(0, subNameFirstDot))
137 fullName = prefix + subBranchName.substr(subNameFirstDot + 1);
140 std::string newPrefix;
142 newPrefix = fullName +
".";
144 ExploreBranch(t, bNamesReg, bNames, subBranch, newPrefix, friendName, allowDuplicates);
146 auto branchDirectlyFromTree = t.
GetBranch(fullName.c_str());
147 if (!branchDirectlyFromTree)
148 branchDirectlyFromTree = t.
FindBranch(fullName.c_str());
149 if (branchDirectlyFromTree)
150 InsertBranchName(bNamesReg, bNames, std::string(branchDirectlyFromTree->GetFullName()), friendName,
153 if (bNamesReg.find(subBranchName) == bNamesReg.end() && t.
GetBranch(subBranchName.c_str()))
154 InsertBranchName(bNamesReg, bNames, subBranchName, friendName, allowDuplicates);
158void GetBranchNamesImpl(
TTree &t, std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
159 std::set<TTree *> &analysedTrees, std::string &friendName,
bool allowDuplicates)
161 std::set<TLeaf *> foundLeaves;
162 if (!analysedTrees.insert(&t).second) {
171 std::string
err(
"GetBranchNames: error in opening the tree ");
173 throw std::runtime_error(
err);
176 for (
auto b : *branches) {
182 if (listOfLeaves->GetEntriesUnsafe() == 1) {
183 auto leaf =
static_cast<TLeaf *
>(listOfLeaves->UncheckedAt(0));
184 InsertBranchName(bNamesReg, bNames, branchName, friendName, foundLeaves, leaf, allowDuplicates);
187 for (
auto leaf : *listOfLeaves) {
188 auto castLeaf =
static_cast<TLeaf *
>(leaf);
189 const auto leafName = std::string(leaf->
GetName());
190 const auto fullName =
branchName +
"." + leafName;
191 InsertBranchName(bNamesReg, bNames, fullName, friendName, foundLeaves, castLeaf, allowDuplicates);
195 ExploreBranch(t, bNamesReg, bNames, branch, branchName +
".", friendName, allowDuplicates);
196 InsertBranchName(bNamesReg, bNames, branchName, friendName, allowDuplicates);
201 bool dotIsImplied =
false;
204 throw std::runtime_error(
"GetBranchNames: unsupported branch type");
206 if (be->GetType() == 3 || be->GetType() == 4)
210 ExploreBranch(t, bNamesReg, bNames, branch,
"", friendName, allowDuplicates);
212 ExploreBranch(t, bNamesReg, bNames, branch, branchName +
".", friendName, allowDuplicates);
214 InsertBranchName(bNamesReg, bNames, branchName, friendName, allowDuplicates);
227 for (
auto friendTreeObj : *friendTrees) {
232 if (alias !=
nullptr)
233 frName = std::string(alias);
235 frName = std::string(friendTree->GetName());
237 GetBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName, allowDuplicates);
243std::vector<std::string> RetrieveDatasetSchema(
TTree &t,
bool allowDuplicates =
true)
245 std::set<std::string> bNamesSet;
246 std::vector<std::string> bNames;
247 std::set<TTree *> analysedTrees;
248 std::string emptyFrName =
"";
249 GetBranchNamesImpl(t, bNamesSet, bNames, analysedTrees, emptyFrName, allowDuplicates);
268 for (std::size_t i = 0ul; i <
fFriends.size(); i++) {
269 const auto &thisFriendAlias = friendInfo->
fFriendNames[i].second;
270 fTree->AddFriend(
fFriends[i].get(), thisFriendAlias.c_str());
284 assert(tree &&
"No tree passed to the constructor of RTTreeDS!");
285 Setup(std::move(tree));
290 assert(tree &&
"No tree passed to the constructor of RTTreeDS!");
291 Setup(std::move(tree), &friendInfo);
297 throw std::runtime_error(
"RDataFrame: invalid TDirectory when constructing the data source.");
299 const std::string treeNameInt(treeName);
300 auto tree = dirPtr->
Get<
TTree>(treeName.data());
302 throw std::runtime_error(
"RDataFrame: TTree dataset '" + std::string(treeName) +
"' cannot be found in '" +
313 fNoCleanupNotifier = std::make_unique<ROOT::Internal::TreeUtils::RNoCleanupNotifier>();
320 std::string treeNameInt{treeName};
321 std::string fileNameGlobInt{fileNameGlob};
323 chain->Add(fileNameGlobInt.c_str());
325 Setup(std::move(chain));
332 fNoCleanupNotifier = std::make_unique<ROOT::Internal::TreeUtils::RNoCleanupNotifier>();
339 std::string treeNameInt(treeName);
341 for (
auto &&
f : fileNameGlobs)
342 chain->Add(
f.c_str());
344 Setup(std::move(chain));
352 fNoCleanupNotifier = std::make_unique<ROOT::Internal::TreeUtils::RNoCleanupNotifier>();
359 return ROOT::RDataFrame(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(treeName, fileNameGlob));
365 return ROOT::RDataFrame(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(treeName, fileNameGlobs));
369 unsigned int,
const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap)
const
378 auto *file = tree->GetCurrentFile();
379 const std::string fname = file !=
nullptr ? file->GetName() :
"#inmemorytree#";
381 std::pair<Long64_t, Long64_t> range =
fTreeReader->GetEntriesRange();
383 if (range.second == -1) {
384 range.second = tree->GetEntries();
387 const std::string &
id = fname + (treename.rfind(
'/', 0) == 0 ?
"" :
"/") + treename;
388 if (sampleMap.empty()) {
389 return RSampleInfo(
id, range,
nullptr, tree->GetEntries());
391 if (sampleMap.find(
id) == sampleMap.end())
392 throw std::runtime_error(
"Full sample identifier '" +
id +
"' cannot be found in the available samples.");
393 return RSampleInfo(
id, range, sampleMap.at(
id), tree->GetEntries());
401 std::atomic<ULong64_t> entryCount(0ull);
407 suppressErrorsForMissingBranches)
408 : std::make_unique<ROOT::TTreeProcessorMT>(*
fTree, entryList,
fNSlots, suppressErrorsForMissingBranches)};
410 tp->Process([&lm, &slotStack, &entryCount](
TTreeReader &treeReader) {
416 auto &&processedEntries = entryCount.load();
417 if ((end - begin) > processedEntries) {
419 "RDataFrame stopped processing after %lld entries, whereas an entry range (begin=%lld,end=%lld) was "
420 "requested. Consider adjusting the end value of the entry range to a maximum of %lld.",
421 processedEntries, begin, end, begin + processedEntries);
431 assert(
fTree &&
"The internal TTree is not available, something went wrong.");
435 return fTree->GetCurrentFile() ? 1 : 0;
440 assert(
fTree &&
"The internal TTree is not available, something went wrong.");
441 const auto treeName =
fTree->GetName();
442 const auto isTChain =
dynamic_cast<TChain *
>(
fTree.get()) ? true :
false;
443 const auto treeType = isTChain ?
"TChain" :
"TTree";
444 const auto isInMemory = !isTChain && !
fTree->GetCurrentFile() ? true :
false;
446 const auto hasFriends = friendInfo.fFriendNames.empty() ? false :
true;
447 std::stringstream ss;
448 ss <<
"Dataframe from " << treeType;
449 if (*treeName != 0) {
450 ss <<
" " << treeName;
453 ss <<
" (in-memory)";
456 const auto numFiles = files.size();
458 ss <<
" in file " << files[0];
461 for (
auto i = 0u; i < numFiles; i++) {
462 ss <<
" " << files[i];
463 if (i < numFiles - 1)
469 const auto numFriends = friendInfo.fFriendNames.size();
470 if (numFriends == 1) {
471 ss <<
"\nwith friend\n";
473 ss <<
"\nwith friends\n";
475 for (
auto i = 0u; i < numFriends; i++) {
476 const auto nameAlias = friendInfo.fFriendNames[i];
477 const auto files = friendInfo.fFriendFileNames[i];
478 const auto numFiles = files.size();
479 const auto subnames = friendInfo.fFriendChainSubNames[i];
480 ss <<
" " << nameAlias.first;
481 if (nameAlias.first != nameAlias.second)
482 ss <<
" (" << nameAlias.second <<
")";
485 ss <<
" " << files[0];
490 for (
auto j = 0u; j < numFiles; j++) {
491 ss <<
" " << subnames[j] <<
" " << files[j];
492 if (j < numFiles - 1)
496 if (i < numFriends - 1)
503std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
517 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase> colReader;
519 if (ti ==
typeid(
void)) {
520 colReader = std::make_unique<ROOT::Internal::RDF::RTreeOpaqueColumnReader>(*treeReader, col);
523 if (
auto &&[toConvert, innerTypeName, collType] = GetCollectionInfo(typeName); toConvert)
524 colReader = std::make_unique<ROOT::Internal::RDF::RTreeUntypedArrayColumnReader>(*treeReader, col,
525 innerTypeName, collType);
527 colReader = std::make_unique<ROOT::Internal::RDF::RTreeUntypedValueColumnReader>(*treeReader, col, typeName);
531 using Map_t = std::unordered_map<std::string, std::pair<std::string, unsigned int>>;
532 const std::string bitmaskMapName =
533 std::string{
"R_rdf_column_to_bitmask_mapping_"} + treeReader->
GetTree()->
GetName();
534 if (Map_t
const *columnMaskMap = treeDir->Get<Map_t>(bitmaskMapName.c_str()); columnMaskMap) {
535 if (
auto it = columnMaskMap->find(std::string(col)); it != columnMaskMap->end()) {
536 colReader = std::make_unique<RMaskedColumnReader>(*treeReader, std::move(colReader), it->second.first,
557 std::vector<std::string> split;
560 auto &valueType = split[1];
561 colTypeName =
"ROOT::VecOps::RVec<" + valueType +
">";
570 std::vector<std::string> split;
573 auto &valueType = split[1];
574 colTypeName =
"ROOT::VecOps::RVec<" + valueType +
">";
586 return std::vector<std::pair<ULong64_t, ULong64_t>>{{rangeBegin, rangeEnd}};
598 auto treeOffsets = chain.GetTreeOffset();
599 auto treeNumber = chain.GetTreeNumber();
600 const ULong64_t thisTreeBegin = treeOffsets[treeNumber];
601 const ULong64_t thisTreeEnd = treeOffsets[treeNumber + 1];
607 return std::vector<std::pair<ULong64_t, ULong64_t>>{{rangeBegin, rangeEnd}};
612 assert(
fTreeReader &&
"TTreeReader is not available, this should never happen.");
614 assert(treeOrChain &&
"Could not retrieve TTree from TTreeReader, something went wrong.");
617 if (
fTreeReader->GetCurrentEntry() >= treeOrChain->GetEntriesFast() - 1 ||
626 if (
auto chain =
dynamic_cast<TChain *
>(treeOrChain)) {
650 throw std::logic_error(
"Something went wrong in initializing the TTreeReader.");
658 fTreeReader->SetSuppressErrorsForMissingBranches(suppressErrorsForMissingBranches);
665 throw std::runtime_error(
"An error was encountered while processing the data. TTreeReader status code is: " +
long long Long64_t
Portable signed long integer 8 bytes.
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
The head node of a RDF computation graph.
const std::set< std::string > & GetSuppressErrorsForMissingBranches() const
void TTreeThreadTask(TTreeReader &treeReader, ROOT::Internal::RSlotStack &slotStack, std::atomic< ULong64_t > &entryCount)
The task run by every thread on an entry range (known by the input TTreeReader), for the TTree data s...
std::vector< std::pair< ULong64_t, ULong64_t > > GetTTreeEntryRange(TTree &tree)
std::vector< std::string > fBranchNamesWithoutDuplicates
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
std::string DescribeDataset() final
std::unique_ptr< ROOT::Internal::TreeUtils::RNoCleanupNotifier > fNoCleanupNotifier
void Setup(std::shared_ptr< TTree > &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo=nullptr)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader) final
Creates a column reader for the requested column.
std::vector< std::unique_ptr< TChain > > fFriends
void Initialize() final
Convenience method called before starting an event-loop.
void RunFinalChecks(bool nodesLeftNotRun) const final
ROOT::RDF::RSampleInfo CreateSampleInfo(unsigned int, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap) const final
std::unique_ptr< TTreeReader > fTreeReader
void Finalize() final
Convenience method called after concluding an event-loop.
bool SetEntry(unsigned int, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
void InitializeWithOpts(const std::set< std::string > &suppressErrorsForMissingBranches) final
std::vector< std::string > fTopLevelBranchNames
std::vector< std::pair< ULong64_t, ULong64_t > > GetTChainEntryRange(TChain &chain)
void ProcessMT(ROOT::Detail::RDF::RLoopManager &lm) final
std::vector< std::string > fBranchNamesWithDuplicates
RTTreeDS(std::shared_ptr< TTree > tree)
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::shared_ptr< TTree > fTree
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
std::string GetTypeNameWithOpts(std::string_view colName, bool vector2RVec) const final
A thread-safe list of N indexes (0 to size - 1).
std::optional< std::pair< ULong64_t, ULong64_t > > fGlobalEntryRange
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A Branch for the case of an object.
A TTree is a list of TBranches.
TClass * IsA() const override
TObjArray * GetListOfLeaves()
A chain is a collection of files containing TTree objects.
Describe directory structure in memory.
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
A TFriendElement TF describes a TTree object TF in a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
const char * GetName() const override
Returns name of object.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
@ kIndexedFriendNoMatch
A friend with TTreeIndex doesn't have an entry for this index.
@ kMissingBranchWhenSwitchingTree
A branch was not found when switching to the next TTree in the chain.
@ kEntryBeyondEnd
last entry loop has reached its end
@ kEntryValid
data read okay
A TTree represents a columnar dataset.
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
TDirectory * GetDirectory() const
virtual Long64_t GetEntries() const
virtual TObjArray * GetListOfBranches()
virtual TTree * GetTree() const
virtual TList * GetListOfFriends() const
virtual const char * GetFriendAlias(TTree *) const
If the 'tree' is a friend, this method returns its alias name.
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
Retrieve the full path(s) to a TTree or the trees in a TChain.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
auto MakeAliasedSharedPtr(T *rawPtr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob)
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Information about friend trees of a certain TTree or TChain object.
std::vector< std::pair< std::string, std::string > > fFriendNames
Pairs of names and aliases of each friend tree/chain.