Logo ROOT  
Reference Guide
InternalTreeUtils.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
10#include "TBranch.h" // Usage of TBranch in ClearMustCleanupBits
11#include "TChain.h"
12#include "TCollection.h" // TRangeStaticCast
13#include "TFile.h"
14#include "TFriendElement.h"
15#include "TTree.h"
16
17#include <utility> // std::pair
18#include <vector>
19#include <stdexcept> // std::runtime_error
20#include <string>
21
22// Recursively get the top level branches from the specified tree and all of its attached friends.
23static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set<std::string> &bNamesReg, std::vector<std::string> &bNames,
24 std::unordered_set<TTree *> &analysedTrees, const std::string friendName = "")
25{
26 if (!analysedTrees.insert(&t).second) {
27 return;
28 }
29
30 auto branches = t.GetListOfBranches();
31 if (branches) {
32 for (auto branchObj : *branches) {
33 const auto name = branchObj->GetName();
34 if (bNamesReg.insert(name).second) {
35 bNames.emplace_back(name);
36 } else if (!friendName.empty()) {
37 // If this is a friend and the branch name has already been inserted, it might be because the friend
38 // has a branch with the same name as a branch in the main tree. Let's add it as <friendname>.<branchname>.
39 const auto longName = friendName + "." + name;
40 if (bNamesReg.insert(longName).second)
41 bNames.emplace_back(longName);
42 }
43 }
44 }
45
46 auto friendTrees = t.GetListOfFriends();
47
48 if (!friendTrees)
49 return;
50
51 for (auto friendTreeObj : *friendTrees) {
52 auto friendElement = static_cast<TFriendElement *>(friendTreeObj);
53 auto friendTree = friendElement->GetTree();
54 const std::string frName(friendElement->GetName()); // this gets us the TTree name or the friend alias if any
55 GetTopLevelBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName);
56 }
57}
58
59namespace ROOT {
60namespace Internal {
61namespace TreeUtils {
62
63///////////////////////////////////////////////////////////////////////////////
64/// Get all the top-level branches names, including the ones of the friend trees
65std::vector<std::string> GetTopLevelBranchNames(TTree &t)
66{
67 std::unordered_set<std::string> bNamesSet;
68 std::vector<std::string> bNames;
69 std::unordered_set<TTree *> analysedTrees;
70 GetTopLevelBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
71 return bNames;
72}
73
74////////////////////////////////////////////////////////////////////////////////
75/// \fn std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
76/// \ingroup tree
77/// \brief Get and store the file names associated with the input tree.
78/// \param[in] tree The tree from which friends information will be gathered.
79/// \throws std::runtime_error If no files could be associated with the input tree.
80std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
81{
82 std::vector<std::string> filenames;
83
84 // If the input tree is a TChain, traverse its list of associated files.
85 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
86 const auto *chainFiles = chain->GetListOfFiles();
87 if (!chainFiles) {
88 throw std::runtime_error("Could not retrieve a list of files from the input TChain.");
89 }
90 // Store this in a variable so it can be later used in `filenames.reserve`
91 // if it passes the check.
92 const auto nfiles = chainFiles->GetEntries();
93 if (nfiles == 0) {
94 throw std::runtime_error("The list of files associated with the input TChain is empty.");
95 }
96 filenames.reserve(nfiles);
97 for (const auto *f : *chainFiles)
98 filenames.emplace_back(f->GetTitle());
99 } else {
100 const TFile *f = tree.GetCurrentFile();
101 if (!f) {
102 throw std::runtime_error("The input TTree is not linked to any file, "
103 "in-memory-only trees are not supported.");
104 }
105
106 filenames.emplace_back(f->GetName());
107 }
108
109 return filenames;
110}
111
112////////////////////////////////////////////////////////////////////////////////
113/// \fn RFriendInfo GetFriendInfo(const TTree &tree)
114/// \ingroup tree
115/// \brief Get and store the names, aliases and file names of the direct friends of the tree.
116/// \param[in] tree The tree from which friends information will be gathered.
117/// \throws std::runtime_error If the input tree has a list of friends, but any
118/// of them could not be associated with any file.
119///
120/// Calls TTree::GetListOfFriends and parses its result for the names, aliases
121/// and file names, with different methodologies depending on whether the
122/// parameter is a TTree or a TChain.
123///
124/// \note This function only retrieves information about <b>direct friends</b>
125/// of the input tree. It will not recurse through friends of friends and
126/// does not take into account circular references in the list of friends
127/// of the input tree.
128///
129/// \returns An RFriendInfo struct, containing the information parsed from the
130/// list of friends. The struct will contain three vectors, which elements at
131/// position `i` represent the `i`-th friend of the input tree. If this friend
132/// is a TTree, the `i`-th element of each of the three vectors will contain
133/// respectively:
134///
135/// - A pair with the name and alias of the tree (the alias might not be
136/// present, in which case it will be just an empty string).
137/// - A vector with a single string representing the path to current file where
138/// the tree is stored.
139/// - An empty vector.
140///
141/// If the `i`-th friend is a TChain instead, the `i`-th element of each of the
142/// three vectors will contain respectively:
143/// - A pair with the name and alias of the chain (if present, both might be
144/// empty strings).
145/// - A vector with all the paths to the files contained in the chain.
146/// - A vector with all the names of the trees making up the chain,
147/// associated with the file names of the previous vector.
149{
150 std::vector<std::pair<std::string, std::string>> friendNames;
151 std::vector<std::vector<std::string>> friendFileNames;
152 std::vector<std::vector<std::string>> friendChainSubNames;
153
154 // Typically, the correct way to call GetListOfFriends would be `tree.GetTree()->GetListOfFriends()`
155 // (see e.g. the discussion at https://github.com/root-project/root/issues/6741).
156 // However, in this case, in case we are dealing with a TChain we really only care about the TChain's
157 // list of friends (which will need to be rebuilt in each processing task) while friends of the TChain's
158 // internal TTree, if any, will be automatically loaded in each task just like they would be automatically
159 // loaded here if we used tree.GetTree()->GetListOfFriends().
160 const auto *friends = tree.GetListOfFriends();
161 if (!friends)
163
164 for (auto fr : *friends) {
165 // Can't pass fr as const TObject* because TFriendElement::GetTree is not const.
166 // Also, we can't retrieve frTree as const TTree* because of TTree::GetFriendAlias(TTree *) a few lines later
167 auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
168
169 // The vector of (name,alias) pairs of the current friend
170 friendFileNames.emplace_back();
171 auto &fileNames = friendFileNames.back();
172
173 // The vector of names of sub trees of the current friend, if it is a TChain.
174 // Otherwise, just an empty vector.
175 friendChainSubNames.emplace_back();
176 auto &chainSubNames = friendChainSubNames.back();
177
178 // Check if friend tree/chain has an alias
179 const auto *alias_c = tree.GetFriendAlias(frTree);
180 const std::string alias = alias_c != nullptr ? alias_c : "";
181
182 // If the current tree is a TChain
183 if (auto frChain = dynamic_cast<const TChain *>(frTree)) {
184 // Note that each TChainElement returned by TChain::GetListOfFiles has a name
185 // equal to the tree name of this TChain and a title equal to the filename.
186 // Accessing the information like this ensures that we get the correct
187 // filenames and treenames if the treename is given as part of the filename
188 // via chain.AddFile(file.root/myTree) and as well if the tree name is given
189 // in the constructor via TChain(myTree) and a file is added later by chain.AddFile(file.root).
190 // Caveat: The chain may be made of sub-trees with different names. All
191 // tree names need to be retrieved separately, see below.
192
193 // Get filelist of the current chain
194 const auto *chainFiles = frChain->GetListOfFiles();
195 if (!chainFiles || chainFiles->GetEntries() == 0) {
196 throw std::runtime_error("A TChain in the list of friends does not contain any file. "
197 "Friends with no associated files are not supported.");
198 }
199
200 // Retrieve the name of the chain and add a (name, alias) pair
201 friendNames.emplace_back(std::make_pair(frChain->GetName(), alias));
202 // Each file in the chain can contain a TTree with a different name wrt
203 // the main TChain. Retrieve the name of the file through `GetTitle`
204 // and the name of the tree through `GetName`
205 for (const auto *f : *chainFiles) {
206 chainSubNames.emplace_back(f->GetName());
207 fileNames.emplace_back(f->GetTitle());
208 }
209 } else {
210 // Get name of the tree
211 const auto realName = GetTreeFullPaths(*frTree)[0];
212 friendNames.emplace_back(std::make_pair(realName, alias));
213
214 // Get filename
215 const auto *f = frTree->GetCurrentFile();
216 if (!f)
217 throw std::runtime_error("A TTree in the list of friends is not linked to any file. "
218 "Friends with no associated files are not supported.");
219 fileNames.emplace_back(f->GetName());
220 }
221 }
222
223 return ROOT::TreeUtils::RFriendInfo{std::move(friendNames), std::move(friendFileNames),
224 std::move(friendChainSubNames)};
225}
226
227////////////////////////////////////////////////////////////////////////////////
228/// \fn std::vector<std::string> GetTreeFullPaths(const TTree &tree)
229/// \ingroup tree
230/// \brief Retrieve the full path(s) to a TTree or the trees in a TChain.
231/// \param[in] tree The tree or chain from which the paths will be retrieved.
232/// \throws std::runtime_error If the input tree is a TChain but no files could
233/// be found associated with it.
234/// \return If the input argument is a TChain, returns a vector of strings with
235/// the name of the tree of each file in the chain. If the input
236/// argument is a TTree, returns a vector with a single element that is
237/// the full path of the tree in the file (e.g. the name of the tree
238/// itself or the path with the directories inside the file). Finally,
239/// the function returns a vector with just the name of the tree if it
240/// couldn't do any better.
241std::vector<std::string> GetTreeFullPaths(const TTree &tree)
242{
243 // Case 1: this is a TChain. For each file it contains, GetName returns the name of the tree in that file
244 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
245 const auto *chainFiles = chain->GetListOfFiles();
246 if (!chainFiles || chainFiles->GetEntries() == 0) {
247 throw std::runtime_error("The input TChain does not contain any file.");
248 }
249 std::vector<std::string> treeNames;
250 for (const auto *f : *chainFiles)
251 treeNames.emplace_back(f->GetName());
252
253 return treeNames;
254 }
255
256 // Case 2: this is a TTree: we get the full path of it
257 if (const auto *treeDir = tree.GetDirectory()) {
258 // We have 2 subcases (ROOT-9948):
259 // - 1. treeDir is a TFile: return the name of the tree.
260 // - 2. treeDir is a directory: reconstruct the path to the tree in the directory.
261 // Use dynamic_cast to check whether the directory is a TFile
262 if (dynamic_cast<const TFile *>(treeDir)) {
263 return {tree.GetName()};
264 }
265 std::string fullPath = treeDir->GetPath(); // e.g. "file.root:/dir"
266 fullPath = fullPath.substr(fullPath.rfind(":/") + 1); // e.g. "/dir"
267 fullPath += "/";
268 fullPath += tree.GetName(); // e.g. "/dir/tree"
269 return {fullPath};
270 }
271
272 // We do our best and return the name of the tree
273 return {tree.GetName()};
274}
275
276/// Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g. returned by TTree::GetListOfBranches).
277///
278/// In some rare cases, all branches in a TTree can have their kMustCleanup bit set, which causes a large amount
279/// of contention at teardown due to concurrent calls to RecursiveRemove (which needs to take the global lock).
280/// This helper function checks the first branch of the array and if it has the kMustCleanup bit set, it resets
281/// it for all branches in the array, recursively going through sub-branches and leaves.
283{
284 if (branches.GetEntries() == 0 || branches.At(0)->TestBit(kMustCleanup) == false)
285 return; // we assume either no branches have the bit set, or all do. we never encountered an hybrid case
286
287 for (auto *branch : ROOT::Detail::TRangeStaticCast<TBranch>(branches)) {
288 branch->ResetBit(kMustCleanup);
289 TObjArray *subBranches = branch->GetListOfBranches();
290 ClearMustCleanupBits(*subBranches);
291 TObjArray *leaves = branch->GetListOfLeaves();
292 if (leaves->GetEntries() > 0 && leaves->At(0)->TestBit(kMustCleanup) == true) {
293 for (TObject *leaf : *leaves)
294 leaf->ResetBit(kMustCleanup);
295 }
296 }
297}
298
299/// \brief Create a TChain object with options that avoid common causes of thread contention.
300///
301/// In particular, set its kWithoutGlobalRegistration mode and reset its kMustCleanup bit.
302std::unique_ptr<TChain> MakeChainForMT(const std::string &name, const std::string &title)
303{
304 auto c = std::make_unique<TChain>(name.c_str(), title.c_str(), TChain::kWithoutGlobalRegistration);
305 c->ResetBit(TObject::kMustCleanup);
306 return c;
307}
308
309} // namespace TreeUtils
310} // namespace Internal
311} // namespace ROOT
static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set< std::string > &bNamesReg, std::vector< std::string > &bNames, std::unordered_set< TTree * > &analysedTrees, const std::string friendName="")
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
char name[80]
Definition: TGX11.cxx:110
@ kMustCleanup
Definition: TObject.h:370
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
@ kWithoutGlobalRegistration
Definition: TChain.h:70
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
A TFriendElement TF describes a TTree object TF in a file.
virtual TTree * GetTree()
Return pointer to friend TTree.
An array of TObjects.
Definition: TObjArray.h:31
Int_t GetEntries() const override
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:523
TObject * At(Int_t idx) const override
Definition: TObjArray.h:164
Mother of all ROOT objects.
Definition: TObject.h:41
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition: TObject.h:201
void ResetBit(UInt_t f)
Definition: TObject.h:200
@ kMustCleanup
if object destructor must call RecursiveRemove()
Definition: TObject.h:64
A TTree represents a columnar dataset.
Definition: TTree.h:79
virtual TObjArray * GetListOfBranches()
Definition: TTree.h:484
virtual TList * GetListOfFriends() const
Definition: TTree.h:486
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
Retrieve the full path(s) to a TTree or the trees in a TChain.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition: tree.py:1
Information about friend trees of a certain TTree or TChain object.
Definition: RFriendInfo.hxx:34