Logo ROOT  
Reference Guide
InternalTreeUtils.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
10#include "TBranch.h" // Usage of TBranch in ClearMustCleanupBits
11#include "TChain.h"
12#include "TCollection.h" // TRangeStaticCast
13#include "TFile.h"
14#include "TFriendElement.h"
15#include "TTree.h"
16
17#include <utility> // std::pair
18#include <vector>
19#include <stdexcept> // std::runtime_error
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24namespace TreeUtils {
25
26////////////////////////////////////////////////////////////////////////////////
27/// \brief Add information of a single friend.
28///
29/// \param[in] treeName Name of the tree.
30/// \param[in] fileNameGlob Path to the file. Refer to TChain::Add for globbing rules.
31/// \param[in] alias Alias for this friend.
32void RFriendInfo::AddFriend(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias)
33{
34 fFriendNames.emplace_back(std::make_pair(treeName, alias));
35 fFriendFileNames.emplace_back(std::vector<std::string>{fileNameGlob});
36 fFriendChainSubNames.emplace_back();
37}
38
39////////////////////////////////////////////////////////////////////////////////
40/// \brief Add information of a single friend.
41///
42/// \param[in] treeName Name of the tree.
43/// \param[in] fileNameGlobs Paths to the files. Refer to TChain::Add for globbing rules.
44/// \param[in] alias Alias for this friend.
45void RFriendInfo::AddFriend(const std::string &treeName, const std::vector<std::string> &fileNameGlobs,
46 const std::string &alias)
47{
48 fFriendNames.emplace_back(std::make_pair(treeName, alias));
49 fFriendFileNames.emplace_back(fileNameGlobs);
50 fFriendChainSubNames.emplace_back(std::vector<std::string>(fileNameGlobs.size(), treeName));
51}
52
53////////////////////////////////////////////////////////////////////////////////
54/// \brief Add information of a single friend.
55///
56/// \param[in] treeAndFileNameGlobs Pairs of (treename, filename). Refer to TChain::Add for globbing rules.
57/// \param[in] alias Alias for this friend.
58void RFriendInfo::AddFriend(const std::vector<std::pair<std::string, std::string>> &treeAndFileNameGlobs,
59 const std::string &alias)
60{
61 fFriendNames.emplace_back(std::make_pair("", alias));
62
63 fFriendFileNames.emplace_back();
64 fFriendChainSubNames.emplace_back();
65
66 auto &theseFileNames = fFriendFileNames.back();
67 auto &theseChainSubNames = fFriendChainSubNames.back();
68 auto nPairs = treeAndFileNameGlobs.size();
69 theseFileNames.reserve(nPairs);
70 theseChainSubNames.reserve(nPairs);
71
72 auto fSubNamesIt = std::back_inserter(theseFileNames);
73 auto fNamesIt = std::back_inserter(theseChainSubNames);
74
75 for (const auto &names : treeAndFileNameGlobs) {
76 *fSubNamesIt = names.first;
77 *fNamesIt = names.second;
78 }
79}
80
81////////////////////////////////////////////////////////////////////////////////
82/// \fn std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
83/// \ingroup tree
84/// \brief Get and store the file names associated with the input tree.
85/// \param[in] tree The tree from which friends information will be gathered.
86/// \throws std::runtime_error If no files could be associated with the input tree.
87std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
88{
89 std::vector<std::string> filenames;
90
91 // If the input tree is a TChain, traverse its list of associated files.
92 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
93 const auto *chainFiles = chain->GetListOfFiles();
94 if (!chainFiles) {
95 throw std::runtime_error("Could not retrieve a list of files from the input TChain.");
96 }
97 // Store this in a variable so it can be later used in `filenames.reserve`
98 // if it passes the check.
99 const auto nfiles = chainFiles->GetEntries();
100 if (nfiles == 0) {
101 throw std::runtime_error("The list of files associated with the input TChain is empty.");
102 }
103 filenames.reserve(nfiles);
104 for (const auto *f : *chainFiles)
105 filenames.emplace_back(f->GetTitle());
106 } else {
107 const TFile *f = tree.GetCurrentFile();
108 if (!f) {
109 throw std::runtime_error("The input TTree is not linked to any file, "
110 "in-memory-only trees are not supported.");
111 }
112
113 filenames.emplace_back(f->GetName());
114 }
115
116 return filenames;
117}
118
119////////////////////////////////////////////////////////////////////////////////
120/// \fn RFriendInfo GetFriendInfo(const TTree &tree)
121/// \ingroup tree
122/// \brief Get and store the names, aliases and file names of the direct friends of the tree.
123/// \param[in] tree The tree from which friends information will be gathered.
124/// \throws std::runtime_error If the input tree has a list of friends, but any
125/// of them could not be associated with any file.
126///
127/// Calls TTree::GetListOfFriends and parses its result for the names, aliases
128/// and file names, with different methodologies depending on whether the
129/// parameter is a TTree or a TChain.
130///
131/// \note This function only retrieves information about <b>direct friends</b>
132/// of the input tree. It will not recurse through friends of friends and
133/// does not take into account circular references in the list of friends
134/// of the input tree.
135///
136/// \returns An RFriendInfo struct, containing the information parsed from the
137/// list of friends. The struct will contain three vectors, which elements at
138/// position `i` represent the `i`-th friend of the input tree. If this friend
139/// is a TTree, the `i`-th element of each of the three vectors will contain
140/// respectively:
141///
142/// - A pair with the name and alias of the tree (the alias might not be
143/// present, in which case it will be just an empty string).
144/// - A vector with a single string representing the path to current file where
145/// the tree is stored.
146/// - An empty vector.
147///
148/// If the `i`-th friend is a TChain instead, the `i`-th element of each of the
149/// three vectors will contain respectively:
150/// - A pair with the name and alias of the chain (if present, both might be
151/// empty strings).
152/// - A vector with all the paths to the files contained in the chain.
153/// - A vector with all the the names of the trees making up the chain,
154/// associated with the file names of the previous vector.
156{
157 std::vector<NameAlias> friendNames;
158 std::vector<std::vector<std::string>> friendFileNames;
159 std::vector<std::vector<std::string>> friendChainSubNames;
160
161 // Typically, the correct way to call GetListOfFriends would be `tree.GetTree()->GetListOfFriends()`
162 // (see e.g. the discussion at https://github.com/root-project/root/issues/6741).
163 // However, in this case, in case we are dealing with a TChain we really only care about the TChain's
164 // list of friends (which will need to be rebuilt in each processing task) while friends of the TChain's
165 // internal TTree, if any, will be automatically loaded in each task just like they would be automatically
166 // loaded here if we used tree.GetTree()->GetListOfFriends().
167 const auto *friends = tree.GetListOfFriends();
168 if (!friends)
169 return RFriendInfo();
170
171 for (auto fr : *friends) {
172 // Can't pass fr as const TObject* because TFriendElement::GetTree is not const.
173 // Also, we can't retrieve frTree as const TTree* because of TTree::GetFriendAlias(TTree *) a few lines later
174 auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
175
176 // The vector of (name,alias) pairs of the current friend
177 friendFileNames.emplace_back();
178 auto &fileNames = friendFileNames.back();
179
180 // The vector of names of sub trees of the current friend, if it is a TChain.
181 // Otherwise, just an empty vector.
182 friendChainSubNames.emplace_back();
183 auto &chainSubNames = friendChainSubNames.back();
184
185 // Check if friend tree/chain has an alias
186 const auto *alias_c = tree.GetFriendAlias(frTree);
187 const std::string alias = alias_c != nullptr ? alias_c : "";
188
189 // If the current tree is a TChain
190 if (auto frChain = dynamic_cast<const TChain *>(frTree)) {
191 // Note that each TChainElement returned by TChain::GetListOfFiles has a name
192 // equal to the tree name of this TChain and a title equal to the filename.
193 // Accessing the information like this ensures that we get the correct
194 // filenames and treenames if the treename is given as part of the filename
195 // via chain.AddFile(file.root/myTree) and as well if the tree name is given
196 // in the constructor via TChain(myTree) and a file is added later by chain.AddFile(file.root).
197 // Caveat: The chain may be made of sub-trees with different names. All
198 // tree names need to be retrieved separately, see below.
199
200 // Get filelist of the current chain
201 const auto *chainFiles = frChain->GetListOfFiles();
202 if (!chainFiles || chainFiles->GetEntries() == 0) {
203 throw std::runtime_error("A TChain in the list of friends does not contain any file. "
204 "Friends with no associated files are not supported.");
205 }
206
207 // Retrieve the name of the chain and add a (name, alias) pair
208 friendNames.emplace_back(std::make_pair(frChain->GetName(), alias));
209 // Each file in the chain can contain a TTree with a different name wrt
210 // the main TChain. Retrieve the name of the file through `GetTitle`
211 // and the name of the tree through `GetName`
212 for (const auto *f : *chainFiles) {
213 chainSubNames.emplace_back(f->GetName());
214 fileNames.emplace_back(f->GetTitle());
215 }
216 } else {
217 // Get name of the tree
218 const auto realName = GetTreeFullPaths(*frTree)[0];
219 friendNames.emplace_back(std::make_pair(realName, alias));
220
221 // Get filename
222 const auto *f = frTree->GetCurrentFile();
223 if (!f)
224 throw std::runtime_error("A TTree in the list of friends is not linked to any file. "
225 "Friends with no associated files are not supported.");
226 fileNames.emplace_back(f->GetName());
227 }
228 }
229
230 return RFriendInfo{std::move(friendNames), std::move(friendFileNames), std::move(friendChainSubNames)};
231}
232
233////////////////////////////////////////////////////////////////////////////////
234/// \fn std::vector<std::string> GetTreeFullPaths(const TTree &tree)
235/// \ingroup tree
236/// \brief Retrieve the full path(s) to a TTree or the trees in a TChain.
237/// \param[in] tree The tree or chain from which the paths will be retrieved.
238/// \throws std::runtime_error If the input tree is a TChain but no files could
239/// be found associated with it.
240/// \return If the input argument is a TChain, returns a vector of strings with
241/// the name of the tree of each file in the chain. If the input
242/// argument is a TTree, returns a vector with a single element that is
243/// the full path of the tree in the file (e.g. the name of the tree
244/// itself or the path with the directories inside the file). Finally,
245/// the function returns a vector with just the name of the tree if it
246/// couldn't do any better.
247std::vector<std::string> GetTreeFullPaths(const TTree &tree)
248{
249 // Case 1: this is a TChain. For each file it contains, GetName returns the name of the tree in that file
250 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
251 const auto *chainFiles = chain->GetListOfFiles();
252 if (!chainFiles || chainFiles->GetEntries() == 0) {
253 throw std::runtime_error("The input TChain does not contain any file.");
254 }
255 std::vector<std::string> treeNames;
256 for (const auto *f : *chainFiles)
257 treeNames.emplace_back(f->GetName());
258
259 return treeNames;
260 }
261
262 // Case 2: this is a TTree: we get the full path of it
263 if (const auto *treeDir = tree.GetDirectory()) {
264 // We have 2 subcases (ROOT-9948):
265 // - 1. treeDir is a TFile: return the name of the tree.
266 // - 2. treeDir is a directory: reconstruct the path to the tree in the directory.
267 // Use dynamic_cast to check whether the directory is a TFile
268 if (dynamic_cast<const TFile *>(treeDir)) {
269 return {tree.GetName()};
270 }
271 std::string fullPath = treeDir->GetPath(); // e.g. "file.root:/dir"
272 fullPath = fullPath.substr(fullPath.rfind(":/") + 1); // e.g. "/dir"
273 fullPath += "/";
274 fullPath += tree.GetName(); // e.g. "/dir/tree"
275 return {fullPath};
276 }
277
278 // We do our best and return the name of the tree
279 return {tree.GetName()};
280}
281
282/// Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g. returned by TTree::GetListOfBranches).
283///
284/// In some rare cases, all branches in a TTree can have their kMustCleanup bit set, which causes a large amount
285/// of contention at teardown due to concurrent calls to RecursiveRemove (which needs to take the global lock).
286/// This helper function checks the first branch of the array and if it has the kMustCleanup bit set, it resets
287/// it for all branches in the array, recursively going through sub-branches and leaves.
289{
290 if (branches.GetEntries() == 0 || branches.At(0)->TestBit(kMustCleanup) == false)
291 return; // we assume either no branches have the bit set, or all do. we never encountered an hybrid case
292
293 for (auto *branch : ROOT::Detail::TRangeStaticCast<TBranch>(branches)) {
294 branch->ResetBit(kMustCleanup);
295 TObjArray *subBranches = branch->GetListOfBranches();
296 ClearMustCleanupBits(*subBranches);
297 TObjArray *leaves = branch->GetListOfLeaves();
298 if (leaves->GetEntries() > 0 && leaves->At(0)->TestBit(kMustCleanup) == true) {
299 for (TObject *leaf : *leaves)
300 leaf->ResetBit(kMustCleanup);
301 }
302 }
303}
304
305} // namespace TreeUtils
306} // namespace Internal
307} // namespace ROOT
#define f(i)
Definition: RSha256.hxx:104
@ kMustCleanup
Definition: TObject.h:355
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
A TFriendElement TF describes a TTree object TF in a file.
An array of TObjects.
Definition: TObjArray.h:31
Int_t GetEntries() const override
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:523
TObject * At(Int_t idx) const override
Definition: TObjArray.h:164
Mother of all ROOT objects.
Definition: TObject.h:37
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition: TObject.h:187
void ResetBit(UInt_t f)
Definition: TObject.h:186
A TTree represents a columnar dataset.
Definition: TTree.h:79
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
Retrieve the full path(s) to a TTree or the trees in a TChain.
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition: tree.py:1
Information about friend trees of a certain TTree or TChain object.
std::vector< NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
void AddFriend(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias="")
Add information of a single friend.
std::vector< std::vector< std::string > > fFriendChainSubNames
Names of the subtrees of a friend TChain.