Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
InternalTreeUtils.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
10#include "TBranch.h" // Usage of TBranch in ClearMustCleanupBits
11#include "TChain.h"
12#include "TCollection.h" // TRangeStaticCast
13#include "TFile.h"
14#include "TFriendElement.h"
15#include "TTree.h"
16#include "TVirtualIndex.h"
17
18#include <cstdint> // std::uint64_t
19#include <limits>
20#include <utility> // std::pair
21#include <vector>
22#include <stdexcept> // std::runtime_error
23#include <string>
24
25// Recursively get the top level branches from the specified tree and all of its attached friends.
26static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set<std::string> &bNamesReg, std::vector<std::string> &bNames,
27 std::unordered_set<TTree *> &analysedTrees, const std::string friendName = "")
28{
29 if (!analysedTrees.insert(&t).second) {
30 return;
31 }
32
33 auto branches = t.GetListOfBranches();
34 if (branches) {
35 for (auto branchObj : *branches) {
36 const auto name = branchObj->GetName();
37 if (bNamesReg.insert(name).second) {
38 bNames.emplace_back(name);
39 } else if (!friendName.empty()) {
40 // If this is a friend and the branch name has already been inserted, it might be because the friend
41 // has a branch with the same name as a branch in the main tree. Let's add it as <friendname>.<branchname>.
42 const auto longName = friendName + "." + name;
43 if (bNamesReg.insert(longName).second)
44 bNames.emplace_back(longName);
45 }
46 }
47 }
48
49 auto friendTrees = t.GetListOfFriends();
50
51 if (!friendTrees)
52 return;
53
54 for (auto friendTreeObj : *friendTrees) {
55 auto friendElement = static_cast<TFriendElement *>(friendTreeObj);
56 auto friendTree = friendElement->GetTree();
57 const std::string frName(friendElement->GetName()); // this gets us the TTree name or the friend alias if any
58 GetTopLevelBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName);
59 }
60}
61
62namespace ROOT {
63namespace Internal {
64namespace TreeUtils {
65
66///////////////////////////////////////////////////////////////////////////////
67/// Get all the top-level branches names, including the ones of the friend trees
68std::vector<std::string> GetTopLevelBranchNames(TTree &t)
69{
70 std::unordered_set<std::string> bNamesSet;
71 std::vector<std::string> bNames;
72 std::unordered_set<TTree *> analysedTrees;
73 GetTopLevelBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
74 return bNames;
75}
76
77////////////////////////////////////////////////////////////////////////////////
78/// \fn std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
79/// \ingroup tree
80/// \brief Get and store the file names associated with the input tree.
81/// \param[in] tree The tree from which friends information will be gathered.
82/// \throws std::runtime_error If no files could be associated with the input tree.
83std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
84{
85 std::vector<std::string> filenames;
86
87 // If the input tree is a TChain, traverse its list of associated files.
88 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
89 const auto *chainFiles = chain->GetListOfFiles();
90 if (!chainFiles) {
91 throw std::runtime_error("Could not retrieve a list of files from the input TChain.");
92 }
93 // Store this in a variable so it can be later used in `filenames.reserve`
94 // if it passes the check.
95 const auto nfiles = chainFiles->GetEntries();
96 if (nfiles == 0) {
97 throw std::runtime_error("The list of files associated with the input TChain is empty.");
98 }
99 filenames.reserve(nfiles);
100 for (const auto *f : *chainFiles)
101 filenames.emplace_back(f->GetTitle());
102 } else {
103 const TFile *f = tree.GetCurrentFile();
104 if (!f) {
105 throw std::runtime_error("The input TTree is not linked to any file, "
106 "in-memory-only trees are not supported.");
107 }
108
109 filenames.emplace_back(f->GetName());
110 }
111
112 return filenames;
113}
114
115////////////////////////////////////////////////////////////////////////////////
116/// \fn RFriendInfo GetFriendInfo(const TTree &tree)
117/// \ingroup tree
118/// \brief Get and store the names, aliases and file names of the direct friends of the tree.
119/// \param[in] tree The tree from which friends information will be gathered.
120/// \param[in] retrieveEntries Whether to also retrieve the number of entries in
121/// each tree of each friend: one if the friend is a TTree, more if
122/// the friend is a TChain. In the latter case, this function
123/// triggers the opening of all files in the chain.
124/// \throws std::runtime_error If the input tree has a list of friends, but any
125/// of them could not be associated with any file.
126///
127/// Calls TTree::GetListOfFriends and parses its result for the names, aliases
128/// and file names, with different methodologies depending on whether the
129/// parameter is a TTree or a TChain.
130///
131/// \note This function only retrieves information about <b>direct friends</b>
132/// of the input tree. It will not recurse through friends of friends and
133/// does not take into account circular references in the list of friends
134/// of the input tree.
135///
136/// \returns An RFriendInfo struct, containing the information parsed from the
137/// list of friends. The struct will contain four vectors, which elements at
138/// position `i` represent the `i`-th friend of the input tree. If this friend
139/// is a TTree, the `i`-th element of each of the three vectors will contain
140/// respectively:
141///
142/// - A pair with the name and alias of the tree (the alias might not be
143/// present, in which case it will be just an empty string).
144/// - A vector with a single string representing the path to current file where
145/// the tree is stored.
146/// - An empty vector.
147/// - A vector with a single element, the number of entries in the tree.
148///
149/// If the `i`-th friend is a TChain instead, the `i`-th element of each of the
150/// three vectors will contain respectively:
151/// - A pair with the name and alias of the chain (if present, both might be
152/// empty strings).
153/// - A vector with all the paths to the files contained in the chain.
154/// - A vector with all the names of the trees making up the chain,
155/// associated with the file names of the previous vector.
156/// - A vector with the number of entries of each tree in the previous vector or
157/// an empty vector, depending on whether \p retrieveEntries is true.
159{
160 // Typically, the correct way to call GetListOfFriends would be `tree.GetTree()->GetListOfFriends()`
161 // (see e.g. the discussion at https://github.com/root-project/root/issues/6741).
162 // However, in this case, in case we are dealing with a TChain we really only care about the TChain's
163 // list of friends (which will need to be rebuilt in each processing task) while friends of the TChain's
164 // internal TTree, if any, will be automatically loaded in each task just like they would be automatically
165 // loaded here if we used tree.GetTree()->GetListOfFriends().
166 const auto *friends = tree.GetListOfFriends();
167 if (!friends || friends->GetEntries() == 0)
169
170 std::vector<std::pair<std::string, std::string>> friendNames;
171 std::vector<std::vector<std::string>> friendFileNames;
172 std::vector<std::vector<std::string>> friendChainSubNames;
173 std::vector<std::vector<std::int64_t>> nEntriesPerTreePerFriend;
174 std::vector<std::unique_ptr<TVirtualIndex>> treeIndexes;
175
176 // Reserve space for all friends
177 auto nFriends = friends->GetEntries();
178 friendNames.reserve(nFriends);
179 friendFileNames.reserve(nFriends);
180 friendChainSubNames.reserve(nFriends);
181 nEntriesPerTreePerFriend.reserve(nFriends);
182
183 for (auto fr : *friends) {
184 // Can't pass fr as const TObject* because TFriendElement::GetTree is not const.
185 // Also, we can't retrieve frTree as const TTree* because of TTree::GetFriendAlias(TTree *) a few lines later
186 auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
187
188 // The vector of (name,alias) pairs of the current friend
189 friendFileNames.emplace_back();
190 auto &fileNames = friendFileNames.back();
191
192 // The vector of names of sub trees of the current friend, if it is a TChain.
193 // Otherwise, just an empty vector.
194 friendChainSubNames.emplace_back();
195 auto &chainSubNames = friendChainSubNames.back();
196
197 // The vector of entries in each tree of the current friend.
198 nEntriesPerTreePerFriend.emplace_back();
199 auto &nEntriesInThisFriend = nEntriesPerTreePerFriend.back();
200
201 // Check if friend tree/chain has an alias
202 const auto *alias_c = tree.GetFriendAlias(frTree);
203 const std::string alias = alias_c != nullptr ? alias_c : "";
204
205 auto *treeIndex = frTree->GetTreeIndex();
206 treeIndexes.emplace_back(static_cast<TVirtualIndex *>(treeIndex ? treeIndex->Clone() : nullptr));
207
208 // If the current tree is a TChain
209 if (auto frChain = dynamic_cast<const TChain *>(frTree)) {
210 // Note that each TChainElement returned by TChain::GetListOfFiles has a name
211 // equal to the tree name of this TChain and a title equal to the filename.
212 // Accessing the information like this ensures that we get the correct
213 // filenames and treenames if the treename is given as part of the filename
214 // via chain.AddFile(file.root/myTree) and as well if the tree name is given
215 // in the constructor via TChain(myTree) and a file is added later by chain.AddFile(file.root).
216 // Caveat: The chain may be made of sub-trees with different names. All
217 // tree names need to be retrieved separately, see below.
218
219 // Get filelist of the current chain
220 const auto *chainFiles = frChain->GetListOfFiles();
221 if (!chainFiles || chainFiles->GetEntries() == 0) {
222 throw std::runtime_error("A TChain in the list of friends does not contain any file. "
223 "Friends with no associated files are not supported.");
224 }
225
226 // Reserve space for this friend
227 auto nFiles = chainFiles->GetEntries();
228 fileNames.reserve(nFiles);
229 chainSubNames.reserve(nFiles);
230 nEntriesInThisFriend.reserve(nFiles);
231
232 // Retrieve the name of the chain and add a (name, alias) pair
233 friendNames.emplace_back(std::make_pair(frChain->GetName(), alias));
234 // Each file in the chain can contain a TTree with a different name wrt
235 // the main TChain. Retrieve the name of the file through `GetTitle`
236 // and the name of the tree through `GetName`
237 for (const auto *f : *chainFiles) {
238
239 auto thisTreeName = f->GetName();
240 auto thisFileName = f->GetTitle();
241
242 chainSubNames.emplace_back(thisTreeName);
243 fileNames.emplace_back(thisFileName);
244
245 if (retrieveEntries) {
246 std::unique_ptr<TFile> thisFile{TFile::Open(thisFileName, "READ_WITHOUT_GLOBALREGISTRATION")};
247 if (!thisFile || thisFile->IsZombie())
248 throw std::runtime_error(std::string("GetFriendInfo: Could not open file \"") + thisFileName + "\"");
249 TTree *thisTree = thisFile->Get<TTree>(thisTreeName);
250 if (!thisTree)
251 throw std::runtime_error(std::string("GetFriendInfo: Could not retrieve TTree \"") + thisTreeName +
252 "\" from file \"" + thisFileName + "\"");
253 nEntriesInThisFriend.emplace_back(thisTree->GetEntries());
254 } else {
255 // Avoid odr-using TTree::kMaxEntries which would require a
256 // definition in C++14. In C++17, all constexpr static data
257 // members are implicitly inline.
258 static constexpr auto maxEntries = TTree::kMaxEntries;
259 nEntriesInThisFriend.emplace_back(maxEntries);
260 }
261 }
262 } else {
263 // Get name of the tree
264 const auto realName = GetTreeFullPaths(*frTree)[0];
265 friendNames.emplace_back(std::make_pair(realName, alias));
266
267 // Get filename
268 const auto *f = frTree->GetCurrentFile();
269 if (!f)
270 throw std::runtime_error("A TTree in the list of friends is not linked to any file. "
271 "Friends with no associated files are not supported.");
272 fileNames.emplace_back(f->GetName());
273 // We already have a pointer to the file and the tree, we can get the
274 // entries without triggering a re-open
275 nEntriesInThisFriend.emplace_back(frTree->GetEntries());
276 }
277 }
278
279 return ROOT::TreeUtils::RFriendInfo(std::move(friendNames), std::move(friendFileNames),
280 std::move(friendChainSubNames), std::move(nEntriesPerTreePerFriend),
281 std::move(treeIndexes));
282}
283
284////////////////////////////////////////////////////////////////////////////////
285/// \fn std::vector<std::string> GetTreeFullPaths(const TTree &tree)
286/// \ingroup tree
287/// \brief Retrieve the full path(s) to a TTree or the trees in a TChain.
288/// \param[in] tree The tree or chain from which the paths will be retrieved.
289/// \throws std::runtime_error If the input tree is a TChain but no files could
290/// be found associated with it.
291/// \return If the input argument is a TChain, returns a vector of strings with
292/// the name of the tree of each file in the chain. If the input
293/// argument is a TTree, returns a vector with a single element that is
294/// the full path of the tree in the file (e.g. the name of the tree
295/// itself or the path with the directories inside the file). Finally,
296/// the function returns a vector with just the name of the tree if it
297/// couldn't do any better.
298std::vector<std::string> GetTreeFullPaths(const TTree &tree)
299{
300 // Case 1: this is a TChain. For each file it contains, GetName returns the name of the tree in that file
301 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
302 const auto *chainFiles = chain->GetListOfFiles();
303 if (!chainFiles || chainFiles->GetEntries() == 0) {
304 throw std::runtime_error("The input TChain does not contain any file.");
305 }
306 std::vector<std::string> treeNames;
307 for (const auto *f : *chainFiles)
308 treeNames.emplace_back(f->GetName());
309
310 return treeNames;
311 }
312
313 // Case 2: this is a TTree: we get the full path of it
314 if (const auto *treeDir = tree.GetDirectory()) {
315 // We have 2 subcases (ROOT-9948):
316 // - 1. treeDir is a TFile: return the name of the tree.
317 // - 2. treeDir is a directory: reconstruct the path to the tree in the directory.
318 // Use dynamic_cast to check whether the directory is a TFile
319 if (dynamic_cast<const TFile *>(treeDir)) {
320 return {tree.GetName()};
321 }
322 std::string fullPath = treeDir->GetPath(); // e.g. "file.root:/dir"
323 fullPath = fullPath.substr(fullPath.rfind(":/") + 1); // e.g. "/dir"
324 fullPath += "/";
325 fullPath += tree.GetName(); // e.g. "/dir/tree"
326 return {fullPath};
327 }
328
329 // We do our best and return the name of the tree
330 return {tree.GetName()};
331}
332
333/// Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g. returned by TTree::GetListOfBranches).
334///
335/// In some rare cases, all branches in a TTree can have their kMustCleanup bit set, which causes a large amount
336/// of contention at teardown due to concurrent calls to RecursiveRemove (which needs to take the global lock).
337/// This helper function checks the first branch of the array and if it has the kMustCleanup bit set, it resets
338/// it for all branches in the array, recursively going through sub-branches and leaves.
340{
341 if (branches.GetEntries() == 0 || branches.At(0)->TestBit(kMustCleanup) == false)
342 return; // we assume either no branches have the bit set, or all do. we never encountered an hybrid case
343
344 for (auto *branch : ROOT::Detail::TRangeStaticCast<TBranch>(branches)) {
345 branch->ResetBit(kMustCleanup);
346 TObjArray *subBranches = branch->GetListOfBranches();
347 ClearMustCleanupBits(*subBranches);
348 TObjArray *leaves = branch->GetListOfLeaves();
349 if (leaves->GetEntries() > 0 && leaves->At(0)->TestBit(kMustCleanup) == true) {
350 for (TObject *leaf : *leaves)
351 leaf->ResetBit(kMustCleanup);
352 }
353 }
354}
355
356/// \brief Create a TChain object with options that avoid common causes of thread contention.
357///
358/// In particular, set its kWithoutGlobalRegistration mode and reset its kMustCleanup bit.
359std::unique_ptr<TChain> MakeChainForMT(const std::string &name, const std::string &title)
360{
361 auto c = std::make_unique<TChain>(name.c_str(), title.c_str(), TChain::kWithoutGlobalRegistration);
362 c->ResetBit(TObject::kMustCleanup);
363 return c;
364}
365
366////////////////////////////////////////////////////////////////////////////////
367/// \brief Create friends from the main TTree.
368std::vector<std::unique_ptr<TChain>> MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
369{
370 std::vector<std::unique_ptr<TChain>> friends;
371 const auto nFriends = finfo.fFriendNames.size();
372 friends.reserve(nFriends);
373
374 for (std::size_t i = 0u; i < nFriends; ++i) {
375 const auto &thisFriendName = finfo.fFriendNames[i].first;
376 const auto &thisFriendFileNames = finfo.fFriendFileNames[i];
377 const auto &thisFriendChainSubNames = finfo.fFriendChainSubNames[i];
378 const auto &thisFriendEntries = finfo.fNEntriesPerTreePerFriend[i];
379
380 // Build a friend chain
381 auto frChain = ROOT::Internal::TreeUtils::MakeChainForMT(thisFriendName);
382 if (thisFriendChainSubNames.empty()) {
383 // The friend is a TTree. It's safe to add to the chain the filename directly.
384 frChain->Add(thisFriendFileNames[0].c_str(), thisFriendEntries[0]);
385 } else {
386 // Otherwise, the new friend chain needs to be built using the nomenclature
387 // "filename?#treename" as argument to `TChain::Add`
388 for (std::size_t j = 0u; j < thisFriendFileNames.size(); ++j) {
389 frChain->Add((thisFriendFileNames[j] + "?#" + thisFriendChainSubNames[j]).c_str(), thisFriendEntries[j]);
390 }
391 }
392
393 auto &treeIndex = finfo.fTreeIndexInfos[i];
394 if (treeIndex) {
395 auto *copyOfIndex = static_cast<TVirtualIndex *>(treeIndex->Clone());
396 copyOfIndex->SetTree(frChain.get());
397 frChain->SetTreeIndex(copyOfIndex);
398 }
399
400 friends.emplace_back(std::move(frChain));
401 }
402
403 return friends;
404}
405
406} // namespace TreeUtils
407} // namespace Internal
408} // namespace ROOT
static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set< std::string > &bNamesReg, std::vector< std::string > &bNames, std::unordered_set< TTree * > &analysedTrees, const std::string friendName="")
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
char name[80]
Definition TGX11.cxx:110
@ kMustCleanup
Definition TObject.h:370
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
@ kWithoutGlobalRegistration
Definition TChain.h:71
const char * GetName() const override
Return name of this collection.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:51
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4053
A TFriendElement TF describes a TTree object TF in a file.
virtual TTree * GetTree()
Return pointer to friend TTree.
An array of TObjects.
Definition TObjArray.h:31
Int_t GetEntries() const override
Return the number of objects in array (i.e.
TObject * At(Int_t idx) const override
Definition TObjArray.h:164
Mother of all ROOT objects.
Definition TObject.h:41
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition TObject.h:201
void ResetBit(UInt_t f)
Definition TObject.h:200
@ kMustCleanup
if object destructor must call RecursiveRemove()
Definition TObject.h:64
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual Long64_t GetEntries() const
Definition TTree.h:460
virtual TObjArray * GetListOfBranches()
Definition TTree.h:485
virtual TList * GetListOfFriends() const
Definition TTree.h:487
static constexpr Long64_t kMaxEntries
Definition TTree.h:226
Abstract interface for Tree Index.
virtual void SetTree(TTree *T)=0
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition tree.py:1
Information about friend trees of a certain TTree or TChain object.
std::vector< std::pair< std::string, std::string > > fFriendNames
Pairs of names and aliases of each friend tree/chain.
std::vector< std::vector< std::int64_t > > fNEntriesPerTreePerFriend
Number of entries contained in each tree of each friend.
std::vector< std::vector< std::string > > fFriendChainSubNames
Names of the subtrees of a friend TChain.
std::vector< std::unique_ptr< TVirtualIndex > > fTreeIndexInfos
Information on the friend's TTreeIndexes.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.