Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
InternalTreeUtils.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
10#include "ROOT/RRangeCast.hxx" // RRangeStaticCast
11#include "TBranch.h" // Usage of TBranch in ClearMustCleanupBits
12#include "TChain.h"
13#include "TCollection.h" // TRangeStaticCast
14#include "TDirectory.h" // TDirectory::TContext
15#include "TFile.h"
16#include "TFriendElement.h"
17#include "TObjString.h"
18#include "TRegexp.h"
19#include "TString.h"
20#include "TSystem.h"
21#include "TSystemFile.h"
22#include "TTree.h"
23#include "TVirtualIndex.h"
24
25#include <limits>
26#include <utility> // std::pair
27#include <vector>
28#include <stdexcept> // std::runtime_error
29#include <string>
30
31// Recursively get the top level branches from the specified tree and all of its attached friends.
32static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set<std::string> &bNamesReg, std::vector<std::string> &bNames,
33 std::unordered_set<TTree *> &analysedTrees, const std::string friendName = "")
34{
35 if (!analysedTrees.insert(&t).second) {
36 return;
37 }
38
39 auto branches = t.GetListOfBranches();
40 if (branches) {
41 for (auto branchObj : *branches) {
42 const auto name = branchObj->GetName();
43 if (bNamesReg.insert(name).second) {
44 bNames.emplace_back(name);
45 } else if (!friendName.empty()) {
46 // If this is a friend and the branch name has already been inserted, it might be because the friend
47 // has a branch with the same name as a branch in the main tree. Let's add it as <friendname>.<branchname>.
48 const auto longName = friendName + "." + name;
49 if (bNamesReg.insert(longName).second)
50 bNames.emplace_back(longName);
51 }
52 }
53 }
54
55 auto friendTrees = t.GetListOfFriends();
56
57 if (!friendTrees)
58 return;
59
60 for (auto friendTreeObj : *friendTrees) {
61 auto friendElement = static_cast<TFriendElement *>(friendTreeObj);
62 auto friendTree = friendElement->GetTree();
63 const std::string frName(friendElement->GetName()); // this gets us the TTree name or the friend alias if any
64 GetTopLevelBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName);
65 }
66}
67
68namespace ROOT {
69namespace Internal {
70namespace TreeUtils {
71
72///////////////////////////////////////////////////////////////////////////////
73/// Get all the top-level branches names, including the ones of the friend trees
74std::vector<std::string> GetTopLevelBranchNames(TTree &t)
75{
76 std::unordered_set<std::string> bNamesSet;
77 std::vector<std::string> bNames;
78 std::unordered_set<TTree *> analysedTrees;
79 GetTopLevelBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
80 return bNames;
81}
82
83////////////////////////////////////////////////////////////////////////////////
84/// \fn std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
85/// \ingroup tree
86/// \brief Get and store the file names associated with the input tree.
87/// \param[in] tree The tree from which friends information will be gathered.
88/// \throws std::runtime_error If no files could be associated with the input tree.
89std::vector<std::string> GetFileNamesFromTree(const TTree &tree)
90{
91 std::vector<std::string> filenames;
92
93 // If the input tree is a TChain, traverse its list of associated files.
94 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
95 const auto *chainFiles = chain->GetListOfFiles();
96 if (!chainFiles) {
97 throw std::runtime_error("Could not retrieve a list of files from the input TChain.");
98 }
99 // Store this in a variable so it can be later used in `filenames.reserve`
100 // if it passes the check.
101 const auto nfiles = chainFiles->GetEntries();
102 if (nfiles == 0) {
103 throw std::runtime_error("The list of files associated with the input TChain is empty.");
104 }
105 filenames.reserve(nfiles);
106 for (const auto *f : *chainFiles)
107 filenames.emplace_back(f->GetTitle());
108 } else {
109 const TFile *f = tree.GetCurrentFile();
110 if (!f) {
111 throw std::runtime_error("The input TTree is not linked to any file, "
112 "in-memory-only trees are not supported.");
113 }
114
115 filenames.emplace_back(f->GetName());
116 }
117
118 return filenames;
119}
120
121////////////////////////////////////////////////////////////////////////////////
122/// \fn RFriendInfo GetFriendInfo(const TTree &tree)
123/// \ingroup tree
124/// \brief Get and store the names, aliases and file names of the direct friends of the tree.
125/// \param[in] tree The tree from which friends information will be gathered.
126/// \param[in] retrieveEntries Whether to also retrieve the number of entries in
127/// each tree of each friend: one if the friend is a TTree, more if
128/// the friend is a TChain. In the latter case, this function
129/// triggers the opening of all files in the chain.
130/// \throws std::runtime_error If the input tree has a list of friends, but any
131/// of them could not be associated with any file.
132///
133/// Calls TTree::GetListOfFriends and parses its result for the names, aliases
134/// and file names, with different methodologies depending on whether the
135/// parameter is a TTree or a TChain.
136///
137/// \note This function only retrieves information about <b>direct friends</b>
138/// of the input tree. It will not recurse through friends of friends and
139/// does not take into account circular references in the list of friends
140/// of the input tree.
141///
142/// \returns An RFriendInfo struct, containing the information parsed from the
143/// list of friends. The struct will contain four vectors, which elements at
144/// position `i` represent the `i`-th friend of the input tree. If this friend
145/// is a TTree, the `i`-th element of each of the three vectors will contain
146/// respectively:
147///
148/// - A pair with the name and alias of the tree (the alias might not be
149/// present, in which case it will be just an empty string).
150/// - A vector with a single string representing the path to current file where
151/// the tree is stored.
152/// - An empty vector.
153/// - A vector with a single element, the number of entries in the tree.
154///
155/// If the `i`-th friend is a TChain instead, the `i`-th element of each of the
156/// three vectors will contain respectively:
157/// - A pair with the name and alias of the chain (if present, both might be
158/// empty strings).
159/// - A vector with all the paths to the files contained in the chain.
160/// - A vector with all the names of the trees making up the chain,
161/// associated with the file names of the previous vector.
162/// - A vector with the number of entries of each tree in the previous vector or
163/// an empty vector, depending on whether \p retrieveEntries is true.
164ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries)
165{
166 // Typically, the correct way to call GetListOfFriends would be `tree.GetTree()->GetListOfFriends()`
167 // (see e.g. the discussion at https://github.com/root-project/root/issues/6741).
168 // However, in this case, in case we are dealing with a TChain we really only care about the TChain's
169 // list of friends (which will need to be rebuilt in each processing task) while friends of the TChain's
170 // internal TTree, if any, will be automatically loaded in each task just like they would be automatically
171 // loaded here if we used tree.GetTree()->GetListOfFriends().
172 const auto *friends = tree.GetListOfFriends();
173 if (!friends || friends->GetEntries() == 0)
175
176 std::vector<std::pair<std::string, std::string>> friendNames;
177 std::vector<std::vector<std::string>> friendFileNames;
178 std::vector<std::vector<std::string>> friendChainSubNames;
179 std::vector<std::vector<Long64_t>> nEntriesPerTreePerFriend;
180 std::vector<std::unique_ptr<TVirtualIndex>> treeIndexes;
181
182 // Reserve space for all friends
183 auto nFriends = friends->GetEntries();
184 friendNames.reserve(nFriends);
185 friendFileNames.reserve(nFriends);
186 friendChainSubNames.reserve(nFriends);
187 nEntriesPerTreePerFriend.reserve(nFriends);
188
189 for (auto fr : *friends) {
190 // Can't pass fr as const TObject* because TFriendElement::GetTree is not const.
191 // Also, we can't retrieve frTree as const TTree* because of TTree::GetFriendAlias(TTree *) a few lines later
192 auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
193
194 // The vector of (name,alias) pairs of the current friend
195 friendFileNames.emplace_back();
196 auto &fileNames = friendFileNames.back();
197
198 // The vector of names of sub trees of the current friend, if it is a TChain.
199 // Otherwise, just an empty vector.
200 friendChainSubNames.emplace_back();
201 auto &chainSubNames = friendChainSubNames.back();
202
203 // The vector of entries in each tree of the current friend.
204 nEntriesPerTreePerFriend.emplace_back();
205 auto &nEntriesInThisFriend = nEntriesPerTreePerFriend.back();
206
207 // Check if friend tree/chain has an alias
208 const auto *alias_c = tree.GetFriendAlias(frTree);
209 const std::string alias = alias_c != nullptr ? alias_c : "";
210
211 auto *treeIndex = frTree->GetTreeIndex();
212 treeIndexes.emplace_back(static_cast<TVirtualIndex *>(treeIndex ? treeIndex->Clone() : nullptr));
213
214 // If the friend tree is a TChain
215 if (auto frChain = dynamic_cast<const TChain *>(frTree)) {
216 // Note that each TChainElement returned by TChain::GetListOfFiles has a name
217 // equal to the tree name of this TChain and a title equal to the filename.
218 // Accessing the information like this ensures that we get the correct
219 // filenames and treenames if the treename is given as part of the filename
220 // via chain.AddFile(file.root/myTree) and as well if the tree name is given
221 // in the constructor via TChain(myTree) and a file is added later by chain.AddFile(file.root).
222 // Caveat: The chain may be made of sub-trees with different names. All
223 // tree names need to be retrieved separately, see below.
224
225 // Get filelist of the current chain
226 const auto *chainFiles = frChain->GetListOfFiles();
227 if (!chainFiles || chainFiles->GetEntries() == 0) {
228 throw std::runtime_error("A TChain in the list of friends does not contain any file. "
229 "Friends with no associated files are not supported.");
230 }
231
232 // Reserve space for this friend
233 auto nFiles = chainFiles->GetEntries();
234 fileNames.reserve(nFiles);
235 chainSubNames.reserve(nFiles);
236 nEntriesInThisFriend.reserve(nFiles);
237
238 // Retrieve the name of the chain and add a (name, alias) pair
239 friendNames.emplace_back(std::make_pair(frChain->GetName(), alias));
240 // Each file in the chain can contain a TTree with a different name wrt
241 // the main TChain. Retrieve the name of the file through `GetTitle`
242 // and the name of the tree through `GetName`
243 for (const auto *f : *chainFiles) {
244
245 auto thisTreeName = f->GetName();
246 auto thisFileName = f->GetTitle();
247
248 chainSubNames.emplace_back(thisTreeName);
249 fileNames.emplace_back(thisFileName);
250
251 if (retrieveEntries) {
252 std::unique_ptr<TFile> thisFile{TFile::Open(thisFileName, "READ_WITHOUT_GLOBALREGISTRATION")};
253 if (!thisFile || thisFile->IsZombie())
254 throw std::runtime_error(std::string("GetFriendInfo: Could not open file \"") + thisFileName + "\"");
255 TTree *thisTree = thisFile->Get<TTree>(thisTreeName);
256 if (!thisTree)
257 throw std::runtime_error(std::string("GetFriendInfo: Could not retrieve TTree \"") + thisTreeName +
258 "\" from file \"" + thisFileName + "\"");
259 nEntriesInThisFriend.emplace_back(thisTree->GetEntries());
260 } else {
261 // Avoid odr-using TTree::kMaxEntries which would require a
262 // definition in C++14. In C++17, all constexpr static data
263 // members are implicitly inline.
264 static constexpr auto maxEntries = TTree::kMaxEntries;
265 nEntriesInThisFriend.emplace_back(maxEntries);
266 }
267 }
268 } else { // frTree is not a chain but a simple TTree
269 // Get name of the tree
270 const auto realName = GetTreeFullPaths(*frTree)[0];
271 friendNames.emplace_back(std::make_pair(realName, alias));
272
273 // Get filename
274 const auto *f = frTree->GetCurrentFile();
275 if (!f)
276 throw std::runtime_error("A TTree in the list of friends is not linked to any file. "
277 "Friends with no associated files are not supported.");
278 fileNames.emplace_back(f->GetName());
279 // We already have a pointer to the file and the tree, we can get the
280 // entries without triggering a re-open
281 nEntriesInThisFriend.emplace_back(frTree->GetEntries());
282 }
283 }
284
285 return ROOT::TreeUtils::RFriendInfo(std::move(friendNames), std::move(friendFileNames),
286 std::move(friendChainSubNames), std::move(nEntriesPerTreePerFriend),
287 std::move(treeIndexes));
288}
289
290////////////////////////////////////////////////////////////////////////////////
291/// \fn std::vector<std::string> GetTreeFullPaths(const TTree &tree)
292/// \ingroup tree
293/// \brief Retrieve the full path(s) to a TTree or the trees in a TChain.
294/// \param[in] tree The tree or chain from which the paths will be retrieved.
295/// \throws std::runtime_error If the input tree is a TChain but no files could
296/// be found associated with it.
297/// \return If the input argument is a TChain, returns a vector of strings with
298/// the name of the tree of each file in the chain. If the input
299/// argument is a TTree, returns a vector with a single element that is
300/// the full path of the tree in the file (e.g. the name of the tree
301/// itself or the path with the directories inside the file). Finally,
302/// the function returns a vector with just the name of the tree if it
303/// couldn't do any better.
304std::vector<std::string> GetTreeFullPaths(const TTree &tree)
305{
306 // Case 1: this is a TChain. For each file it contains, GetName returns the name of the tree in that file
307 if (auto chain = dynamic_cast<const TChain *>(&tree)) {
308 const auto *chainFiles = chain->GetListOfFiles();
309 if (!chainFiles || chainFiles->GetEntries() == 0) {
310 throw std::runtime_error("The input TChain does not contain any file.");
311 }
312 std::vector<std::string> treeNames;
313 for (const auto *f : *chainFiles)
314 treeNames.emplace_back(f->GetName());
315
316 return treeNames;
317 }
318
319 // Case 2: this is a TTree: we get the full path of it
320 if (const auto *treeDir = tree.GetDirectory()) {
321 // We have 2 subcases (ROOT-9948):
322 // - 1. treeDir is a TFile: return the name of the tree.
323 // - 2. treeDir is a directory: reconstruct the path to the tree in the directory.
324 // Use dynamic_cast to check whether the directory is a TFile
325 if (dynamic_cast<const TFile *>(treeDir)) {
326 return {tree.GetName()};
327 }
328 std::string fullPath = treeDir->GetPath(); // e.g. "file.root:/dir"
329 fullPath = fullPath.substr(fullPath.rfind(":/") + 1); // e.g. "/dir"
330 fullPath += '/';
331 fullPath += tree.GetName(); // e.g. "/dir/tree"
332 return {fullPath};
333 }
334
335 // We do our best and return the name of the tree
336 return {tree.GetName()};
337}
338
339/// Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g. returned by TTree::GetListOfBranches).
340///
341/// In some rare cases, all branches in a TTree can have their kMustCleanup bit set, which causes a large amount
342/// of contention at teardown due to concurrent calls to RecursiveRemove (which needs to take the global lock).
343/// This helper function checks the first branch of the array and if it has the kMustCleanup bit set, it resets
344/// it for all branches in the array, recursively going through sub-branches and leaves.
346{
347 if (branches.GetEntries() == 0 || branches.At(0)->TestBit(kMustCleanup) == false)
348 return; // we assume either no branches have the bit set, or all do. we never encountered an hybrid case
349
350 for (auto *branch : ROOT::Detail::TRangeStaticCast<TBranch>(branches)) {
351 branch->ResetBit(kMustCleanup);
352 TObjArray *subBranches = branch->GetListOfBranches();
353 ClearMustCleanupBits(*subBranches);
354 TObjArray *leaves = branch->GetListOfLeaves();
355 if (leaves->GetEntries() > 0 && leaves->At(0)->TestBit(kMustCleanup) == true) {
356 for (TObject *leaf : *leaves)
357 leaf->ResetBit(kMustCleanup);
358 }
359 }
360}
361
362/// \brief Create a TChain object with options that avoid common causes of thread contention.
363///
364/// In particular, set its kWithoutGlobalRegistration mode and reset its kMustCleanup bit.
365std::unique_ptr<TChain> MakeChainForMT(const std::string &name, const std::string &title)
366{
367 auto c = std::make_unique<TChain>(name.c_str(), title.c_str(), TChain::kWithoutGlobalRegistration);
368 c->ResetBit(TObject::kMustCleanup);
369 return c;
370}
371
372////////////////////////////////////////////////////////////////////////////////
373/// \brief Create friends from the main TTree.
374std::vector<std::unique_ptr<TChain>> MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
375{
376 std::vector<std::unique_ptr<TChain>> friends;
377 const auto nFriends = finfo.fFriendNames.size();
378 friends.reserve(nFriends);
379
380 for (std::size_t i = 0u; i < nFriends; ++i) {
381 const auto &thisFriendName = finfo.fFriendNames[i].first;
382 const auto &thisFriendFileNames = finfo.fFriendFileNames[i];
383 const auto &thisFriendChainSubNames = finfo.fFriendChainSubNames[i];
384 const auto &thisFriendEntries = finfo.fNEntriesPerTreePerFriend[i];
385
386 // Build a friend chain
387 auto frChain = ROOT::Internal::TreeUtils::MakeChainForMT(thisFriendName);
388 if (thisFriendChainSubNames.empty()) {
389 // The friend is a TTree. It's safe to add to the chain the filename directly.
390 frChain->Add(thisFriendFileNames[0].c_str(), thisFriendEntries[0]);
391 } else {
392 // Otherwise, the new friend chain needs to be built using the nomenclature
393 // "filename?#treename" as argument to `TChain::Add`
394 for (std::size_t j = 0u; j < thisFriendFileNames.size(); ++j) {
395 frChain->Add((thisFriendFileNames[j] + "?#" + thisFriendChainSubNames[j]).c_str(), thisFriendEntries[j]);
396 }
397 }
398
399 const auto &treeIndex = finfo.fTreeIndexInfos[i];
400 if (treeIndex) {
401 // The call to TChain::BuildIndex does much more than just copying
402 // the indices that may have been already present in the trees of the
403 // chain. Notably, it calls `LoadTree` for every tree in the chain
404 // making sure that all branches, indices and relationships are
405 // properly set. In order to avoid unexpected behaviours, we always
406 // let the task-local friend chain rebuild its index.
407 frChain->BuildIndex(treeIndex->GetMajorName(), treeIndex->GetMinorName());
408 }
409
410 friends.emplace_back(std::move(frChain));
411 }
412
413 return friends;
414}
415
416////////////////////////////////////////////////////////////////////////////////
417/// \brief Recursively expand the glob to take care of potential wildcard
418/// specials for subdirectories in the glob.
419/// \param[in] l The list of full paths to files.
420/// \param[in] glob The glob to expand.
421/// \throws std::runtime_error If the directory parts of the glob refer to a
422/// path that cannot be opened.
423///
424/// If the glob contains a wildcard special for subdirectories, the three parts
425/// of the glob (directory, subdirectoryglob, remainder) are separated.
426/// Otherwise the glob is expanded to (directory, fileglob).
427/// The directory is first expanded via TSystem::ExpandPathName then opened via
428/// TSystem::OpenDirectory. If the directory can be opened, then current
429/// glob is used as regex expression (via TRegexp) to find subdirectories or
430/// store those files in the directory that match the regex.
431void RecursiveGlob(TList &out, const std::string &glob)
432{
433 std::string dirname;
434 std::string basename; // current glob to expand, could be a directory or file.
435 std::string remainder;
436
437 // This list of characters is currently only defined inside TString::MaybeWildcard() at
438 // https://github.com/root-project/root/blob/5df0ef8bfa3c127e554e845cd6582bc0b4d7f96a/core/base/src/TString.cxx#L960.
439 const char *wildcardSpecials = "[]*?";
440
441 const auto wildcardPos = glob.find_first_of(wildcardSpecials);
442 // Get the closest slash, to the left of the first wildcard
443 auto slashLPos = glob.rfind('/', wildcardPos);
444 // Get the closest slash, to the right of the first wildcard
445 const auto slashRPos = glob.find('/', wildcardPos);
446
447 if (slashLPos != std::string::npos) {
448 // Separate the base directory in the glob.
449 dirname = glob.substr(0, slashLPos);
450 } else {
451 // There is no directory component in the glob, use the CWD
453
454 // Set to -1 to extract the basename from the beginning of the glob string when doing +1 below.
455 slashLPos = -1;
456 }
457
458 // Seperate the subdirectory and/or file component.
459 if (slashRPos != std::string::npos) {
460 basename = glob.substr(slashLPos + 1, slashRPos - (slashLPos + 1));
461 remainder = glob.substr(slashRPos + 1);
462 } else {
463 basename = glob.substr(slashLPos + 1);
464 }
465
466 // Attempt opening of directory contained in the glob
467 const char *epath = gSystem->ExpandPathName(dirname.c_str());
468 void *dir = gSystem->OpenDirectory(epath);
469 delete[] epath;
470
471 if (dir) {
472 TRegexp re(basename.c_str(), true);
473 TString entryName;
474
475 while (const char *dirEntry = gSystem->GetDirEntry(dir)) {
476 if (!strcmp(dirEntry, ".") || !strcmp(dirEntry, ".."))
477 continue;
478 entryName = dirEntry;
479 if ((basename != dirEntry) && entryName.Index(re) == kNPOS)
480 continue;
481
482 // TODO: It might be better to use std::file_system::is_directory(),
483 // but for GCC < 9.1 this requires an extra linking flag https://en.cppreference.com/w/cpp/filesystem
484 bool isDirectory = TSystemFile().IsDirectory((dirname + '/' + dirEntry).c_str());
485 if (!remainder.empty() && isDirectory) {
486 RecursiveGlob(out, dirname + '/' + dirEntry + '/' + remainder);
487 } else if (remainder.empty() && !isDirectory) {
488 // Using '/' as separator here as it was done in TChain::Add
489 // In principle this should be using the appropriate platform separator
490 out.Add(new TObjString((dirname + '/' + dirEntry).c_str()));
491 }
492 }
493
495 } else {
496 throw std::runtime_error("ExpandGlob: could not open directory '" + dirname + "'.");
497 }
498}
499
500////////////////////////////////////////////////////////////////////////////////
501/// \brief Expands input glob into a collection of full paths to files.
502/// \param[in] glob The glob to expand.
503/// \throws std::runtime_error If the directory parts of the glob refer to a
504/// path that cannot be opened.
505/// \return A vector of strings, the fully expanded paths to the files referred
506/// to by the glob.
507///
508/// The glob is expanded recursively, but subdirectories are only expanded when
509/// it is explicitly included in the pattern. For example, "dir/*" will only
510/// list the files in the subdirectories of "dir", but "dir/*/*" will list the
511/// files in the subsubdirectories of "dir".
512std::vector<std::string> ExpandGlob(const std::string &glob)
513{
514 TList l;
515 RecursiveGlob(l, glob);
516
517 // Sort the files in alphanumeric order
518 l.Sort();
519
520 std::vector<std::string> ret;
521 ret.reserve(l.GetEntries());
522 for (const auto *tobjstr : ROOT::RangeStaticCast<const TObjString *>(l)) {
523 ret.push_back(tobjstr->GetName());
524 }
525
526 return ret;
527}
528
529////////////////////////////////////////////////////////////////////////////////
530/// \brief Returns the cluster boundaries and number of entries of the input tree.
531/// \param[in] treename Name of the tree.
532/// \param[in] filename Path to the file.
533/// \return a pair (cluster_boundaries, n_entries). The vector of cluster
534/// of cluster boundaries contains the beginning entry of the first
535/// cluster up to the ending entry of the last cluster, e.g. for a tree
536/// with 3 clusters of 10 entries each, this will return [0, 10, 20, 30]
537std::pair<std::vector<Long64_t>, Long64_t> GetClustersAndEntries(std::string_view treename, std::string_view path)
538{
539 ::TDirectory::TContext ctxt; // Avoid changing gDirectory;
540 std::unique_ptr<TFile> inFile{TFile::Open(path.data(), "READ_WITHOUT_GLOBALREGISTRATION")};
541 if (!inFile || inFile->IsZombie())
542 throw std::invalid_argument("GetClustersAndEntries: could not open file \"" + std::string(path) + "\".");
543 std::unique_ptr<TTree> tree{inFile->Get<TTree>(treename.data())};
544 if (!tree)
545 throw std::invalid_argument("GetClustersAndEntries: could not find tree \"" + std::string(treename) +
546 "\" in file \"" + std::string(path) + "\".");
547 // One TTree in one file, we can assume GetEntriesFast returns the correct number of entries
548 auto nEntries{tree->GetEntriesFast()};
549
550 auto clusterIt{tree->GetClusterIterator(0)};
551 auto clusterBegin{clusterIt()};
552 std::vector boundaries{clusterBegin};
553 while (clusterBegin < nEntries) {
554 clusterBegin = clusterIt();
555 boundaries.push_back(clusterBegin);
556 }
557
558 return std::make_pair(std::move(boundaries), std::move(nEntries));
559}
560
561////////////////////////////////////////////////////////////////////////////////
562/// \brief Check whether the input tree is using any TTreeIndex
563/// \param[in] tree The input TTree/TChain.
564/// \return A pair. The first item is a boolean telling whether the tree is using
565/// an index. The second item is a string with the name of the first
566/// friend tree found with a connected index.
567std::pair<bool, std::string> TreeUsesIndexedFriends(const TTree &tree)
568{
569 if (auto friends = tree.GetListOfFriends(); friends && friends->GetEntries() > 0) {
570 for (auto *fr : ROOT::Detail::TRangeStaticCast<TFriendElement>(friends)) {
571 auto *frTree = fr->GetTree();
572 if (frTree->GetTreeIndex())
573 return {true, frTree->GetName()};
574 }
575 }
576 return {false, ""};
577}
578
579} // namespace TreeUtils
580} // namespace Internal
581} // namespace ROOT
static void GetTopLevelBranchNamesImpl(TTree &t, std::unordered_set< std::string > &bNamesReg, std::vector< std::string > &bNames, std::unordered_set< TTree * > &analysedTrees, const std::string friendName="")
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
constexpr Ssiz_t kNPOS
Definition RtypesCore.h:117
long long Long64_t
Definition RtypesCore.h:69
char name[80]
Definition TGX11.cxx:110
@ kMustCleanup
Definition TObject.h:368
R__EXTERN TSystem * gSystem
Definition TSystem.h:561
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
@ kWithoutGlobalRegistration
Definition TChain.h:71
const char * GetName() const override
Return name of this collection.
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4089
A TFriendElement TF describes a TTree object TF in a file.
virtual TTree * GetTree()
Return pointer to friend TTree.
A doubly linked list.
Definition TList.h:38
An array of TObjects.
Definition TObjArray.h:31
Int_t GetEntries() const override
Return the number of objects in array (i.e.
TObject * At(Int_t idx) const override
Definition TObjArray.h:164
Collectable string class.
Definition TObjString.h:28
Mother of all ROOT objects.
Definition TObject.h:41
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition TObject.h:199
void ResetBit(UInt_t f)
Definition TObject.h:198
@ kMustCleanup
if object destructor must call RecursiveRemove()
Definition TObject.h:64
Regular expression class.
Definition TRegexp.h:31
Basic string class.
Definition TString.h:139
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
A TSystemFile describes an operating system file.
Definition TSystemFile.h:29
virtual Bool_t IsDirectory(const char *dir=nullptr) const
Check if object is a directory.
virtual Bool_t ExpandPathName(TString &path)
Expand a pathname getting rid of special shell characters like ~.
Definition TSystem.cxx:1274
virtual void FreeDirectory(void *dirp)
Free a directory.
Definition TSystem.cxx:845
virtual void * OpenDirectory(const char *name)
Open a directory. Returns 0 if directory does not exist.
Definition TSystem.cxx:836
virtual const char * GetDirEntry(void *dirp)
Get a directory entry. Returns 0 if no more entries.
Definition TSystem.cxx:853
virtual const char * UnixPathName(const char *unixpathname)
Convert from a local pathname to a Unix pathname.
Definition TSystem.cxx:1063
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:871
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual Long64_t GetEntries() const
Definition TTree.h:463
virtual TObjArray * GetListOfBranches()
Definition TTree.h:528
virtual TList * GetListOfFriends() const
Definition TTree.h:530
static constexpr Long64_t kMaxEntries
Definition TTree.h:229
Abstract interface for Tree Index.
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::pair< bool, std::string > TreeUsesIndexedFriends(const TTree &tree)
Check whether the input tree is using any TTreeIndex.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
void RecursiveGlob(TList &out, const std::string &glob)
Recursively expand the glob to take care of potential wildcard specials for subdirectories in the glo...
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > ExpandGlob(const std::string &glob)
Expands input glob into a collection of full paths to files.
std::pair< std::vector< Long64_t >, Long64_t > GetClustersAndEntries(std::string_view treename, std::string_view path)
Returns the cluster boundaries and number of entries of the input tree.
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Information about friend trees of a certain TTree or TChain object.
std::vector< std::pair< std::string, std::string > > fFriendNames
Pairs of names and aliases of each friend tree/chain.
std::vector< std::vector< std::string > > fFriendChainSubNames
Names of the subtrees of a friend TChain.
std::vector< std::unique_ptr< TVirtualIndex > > fTreeIndexInfos
Information on the friend's TTreeIndexes.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< std::vector< Long64_t > > fNEntriesPerTreePerFriend
Number of entries contained in each tree of each friend.
TLine l
Definition textangle.C:4