Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RootObjTree.cxx
Go to the documentation of this file.
1// \file RootObjTree.cxx
2///
3/// \author Giacomo Parolini <giacomo.parolini@cern.ch>
4/// \date 2025-10-14
5
6#include "RootObjTree.hxx"
7
8#include "wildcards.hpp"
9
10#include <TFile.h>
11#include <TSystem.h>
12
13#include <ROOT/StringUtils.hxx>
14
15#include <algorithm>
16#include <deque>
17#include <iostream>
18#include <set>
19
20static bool MatchesGlob(std::string_view haystack, std::string_view pattern)
21{
22 return wildcards::match(haystack, pattern);
23}
24
26ROOT::CmdLine::GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags)
27{
29 source.fFileName = fileName;
30 auto &nodeTree = source.fObjectTree;
31 const char *fileMode = "READ_WITHOUT_GLOBALREGISTRATION";
32 const std::string fileNameStr { fileName };
33 if (flags & kOpenFilesAsWritable) {
34 // There is no way to open a file for writing only if the file already exists, so we need to manually check first.
35 if (gSystem->AccessPathName(fileNameStr.c_str(), kWritePermission)) {
36 source.fErrors.push_back("File '" + fileNameStr + "' does not exist or is not writable.");
37 return source;
38 }
39 fileMode = "UPDATE_WITHOUT_GLOBALREGISTRATION";
40 }
41 nodeTree.fFile = std::unique_ptr<TFile>(TFile::Open(fileNameStr.c_str(), fileMode));
42 if (!nodeTree.fFile || nodeTree.fFile->IsZombie()) {
43 source.fErrors.push_back("Failed to open file '" + fileNameStr + "'");
44 return source;
45 }
46
47 const auto patternSplits = pattern.empty() ? std::vector<std::string>{} : ROOT::Split(pattern, "/");
48 std::vector<bool> patternWasMatchedAtLeastOnce(patternSplits.size());
49
50 /// Match all objects at all nesting levels down to the deepest nesting level of `pattern` (or all nesting levels
51 /// if we have the "recursive listing" flag). The nodes are visited breadth-first.
52
53 // Initialize the nodeTree with the root node and mark it as the first node to visit.
54 {
55 ROOT::CmdLine::RootObjNode rootNode = {};
56 rootNode.fName = std::string(fileName);
57 rootNode.fClassName = nodeTree.fFile->Class()->GetName();
58 rootNode.fDir = nodeTree.fFile.get();
59 nodeTree.fNodes.emplace_back(std::move(rootNode));
60 }
61 std::deque<NodeIdx_t> nodesToVisit{0};
62
63 const bool isRecursive = flags & EGetMatchingPathsFlags::kRecursive;
64 do {
65 NodeIdx_t curIdx = nodesToVisit.front();
66 nodesToVisit.pop_front();
67 ROOT::CmdLine::RootObjNode *cur = &nodeTree.fNodes[curIdx];
68 assert(cur->fDir);
69
70 // Gather all keys under this directory and sort them by namecycle.
71 std::vector<TKey *> keys;
72 keys.reserve(cur->fDir->GetListOfKeys()->GetEntries());
74 keys.push_back(key);
75
76 std::sort(keys.begin(), keys.end(), [](const TKey *a, const TKey *b) {
77 int cmp = strcmp(a->GetName(), b->GetName());
78 // Note that we order by decreasing cycle, i.e. from most to least recent
79 return (cmp != 0) ? (cmp < 0) : (a->GetCycle() > b->GetCycle());
80 });
81
82 // Iterate the keys and find matches
83 for (TKey *key : keys) {
84 // NOTE: cur->fNesting can only be >= patternSplits.size() if we have `isRecursive == true` (see the code near
85 // the end of the outer do/while loop).
86 // In that case we don't care about matching patterns anymore because we are already beyond the nesting level
87 // where pattern filtering applies.
88 // In all other cases, we check if the key name matches the pattern and skip it if it doesn't.
89 if (cur->fNesting < patternSplits.size()) {
90 const auto &patternSplit = patternSplits[cur->fNesting];
91 const bool hasCycle = patternSplit.rfind(';') != std::string_view::npos;
92 const auto nameCycle =
93 std::string(key->GetName()) + (hasCycle ? ';' + std::to_string(key->GetCycle()) : "");
94 if (MatchesGlob(nameCycle, patternSplits[cur->fNesting]))
95 patternWasMatchedAtLeastOnce[cur->fNesting] = true;
96 else
97 continue;
98 }
99
100 auto &newChild = nodeTree.fNodes.emplace_back(NodeFromKey(*key));
101 // Need to get back cur since the emplace_back() may have moved it.
102 cur = &nodeTree.fNodes[curIdx];
103 newChild.fNesting = cur->fNesting + 1;
104 newChild.fParent = curIdx;
105 if (!cur->fNChildren)
106 cur->fFirstChild = nodeTree.fNodes.size() - 1;
107 cur->fNChildren++;
108
109 const auto *cl = TClass::GetClass(key->GetClassName());
110 if (cl && cl->InheritsFrom("TDirectory"))
111 newChild.fDir = cur->fDir->GetDirectory(key->GetName());
112 }
113
114 // Only recurse into subdirectories that are up to the deepest level we ask for through `pattern` (except in
115 // case of recursive listing).
116 if (cur->fNesting < patternSplits.size() || isRecursive) {
117 for (auto childIdx = cur->fFirstChild; childIdx < cur->fFirstChild + cur->fNChildren; ++childIdx) {
118 auto &child = nodeTree.fNodes[childIdx];
119 if (child.fDir)
120 nodesToVisit.push_back(childIdx);
121 else if (cur->fNesting < patternSplits.size())
122 nodeTree.fLeafList.push_back(childIdx);
123 }
124 }
125 if (cur->fNesting == patternSplits.size()) {
126 if (cur->fDir)
127 nodeTree.fDirList.push_back(curIdx);
128 else
129 nodeTree.fLeafList.push_back(curIdx);
130 }
131 } while (!nodesToVisit.empty());
132
133 if (!(flags & kIgnoreFailedMatches)) {
134 for (auto i = 0u; i < patternSplits.size(); ++i) {
135 // We don't append errors for '*' because its semantics imply "0 or more matches", so 0 matches is a valid
136 // case. For any other pattern we expect at least 1 match.
137 if (!patternWasMatchedAtLeastOnce[i] && !patternSplits[i].empty() && patternSplits[i] != "*") {
138 std::string err = "'" + std::string(fileName) + ":" +
139 ROOT::Join("/", std::span<const std::string>{patternSplits.data(), i + 1}) +
140 "' matches no objects.";
141 source.fErrors.push_back(err);
142 }
143 }
144 }
145
146 return source;
147}
148
151{
152 auto prefixIdx = sourceRaw.find("://");
153 std::string_view::size_type separatorIdx = 0;
154 if (prefixIdx != std::string_view::npos) {
155 bool prefixFound = false;
156 // Handle known URI prefixes
157 static const char *const specialPrefixes[] = {"http", "https", "root", "gs", "s3"};
158 auto prefix = sourceRaw.substr(0, prefixIdx);
159 for (std::string_view knownPrefix : specialPrefixes) {
160 if (prefix == knownPrefix) {
161 prefixFound = true;
162 break;
163 }
164 }
165 if (!prefixFound) {
166 return R__FAIL("unknown file protocol");
167 }
168 separatorIdx = sourceRaw.substr(prefixIdx + 3).find_first_of(':');
169 if (separatorIdx != std::string_view::npos)
170 separatorIdx += prefixIdx + 3;
171 } else {
172 separatorIdx = sourceRaw.find_first_of(':');
173 }
174
175 if (separatorIdx != std::string_view::npos) {
176 return {{sourceRaw.substr(0, separatorIdx), sourceRaw.substr(separatorIdx + 1)}};
177 }
178 return {{sourceRaw, std::string_view{}}};
179}
180
181ROOT::CmdLine::RootSource ROOT::CmdLine::ParseRootSource(std::string_view sourceRaw, std::uint32_t flags)
182{
184
185 auto res = SplitIntoFileNameAndPattern(sourceRaw);
186 if (!res) {
187 source.fErrors.push_back(res.GetError()->GetReport());
188 return source;
189 }
190
191 auto [fileName, tokens] = res.Unwrap();
192 source = ROOT::CmdLine::GetMatchingPathsInFile(fileName, tokens, flags);
193
194 assert(source.fErrors.empty() == !!source.fObjectTree.fFile);
195 return source;
196}
197
198std::vector<ROOT::CmdLine::RootSource>
199ROOT::CmdLine::ParseRootSources(const std::vector<std::string> &sourcesRaw, std::uint32_t flags)
200{
201 std::vector<ROOT::CmdLine::RootSource> sources;
202 sources.reserve(sourcesRaw.size());
203
204 for (const auto &srcRaw : sourcesRaw) {
205 sources.push_back(ParseRootSource(srcRaw, flags));
206 }
207
208 return sources;
209}
210
211void ROOT::CmdLine::PrintObjTree(const RootObjTree &tree, std::ostream &out)
212{
213 if (tree.fNodes.empty())
214 return;
215
216 struct RevNode {
217 std::set<NodeIdx_t> fChildren;
218 };
219 std::vector<RevNode> revNodes;
220 revNodes.resize(tree.fNodes.size());
221
222 // Un-linearize the tree
223 for (int i = (int)tree.fNodes.size() - 1; i >= 0; --i) {
224 const auto *node = &tree.fNodes[i];
225 NodeIdx_t childIdx = i;
226 NodeIdx_t parentIdx = node->fParent;
227 while (childIdx != parentIdx) {
228 auto &revNodeParent = revNodes[parentIdx];
229 revNodeParent.fChildren.insert(childIdx);
230 node = &tree.fNodes[parentIdx];
231 childIdx = parentIdx;
232 parentIdx = node->fParent;
233 }
234 }
235
236 // Print out the tree.
237 // Vector of {nesting, nodeIdx}
238 std::vector<std::pair<std::uint32_t, NodeIdx_t>> nodesToVisit = {{0, 0}};
239 while (!nodesToVisit.empty()) {
240 const auto [nesting, nodeIdx] = nodesToVisit.back();
241 nodesToVisit.pop_back();
242 const auto &cur = revNodes[nodeIdx];
243 const auto &node = tree.fNodes[nodeIdx];
244 for (auto i = 0u; i < 2 * nesting; ++i)
245 out << ' ';
246 out << node.fName << ";" << node.fCycle << " : " << node.fClassName << "\n";
247 // Add the children in reverse order to preserve alphabetical order during depth-first visit.
248 for (auto it = cur.fChildren.rbegin(); it != cur.fChildren.rend(); ++it) {
249 nodesToVisit.push_back({nesting + 1, *it});
250 }
251 }
252}
253
256{
257 const RootObjNode *node = &tree.fNodes[nodeIdx];
258 std::string fullPath = node->fName;
259 while (node->fParent != 0) {
260 node = &tree.fNodes[node->fParent];
261 fullPath = node->fName + (fullPath.empty() ? "" : "/") + fullPath;
262 }
263 if (opt == ENodeFullPathOpt::kIncludeFilename && nodeIdx > 0)
264 fullPath = tree.fNodes[0].fName + ":" + fullPath;
265 return fullPath;
266}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define b(i)
Definition RSha256.hxx:100
#define a(i)
Definition RSha256.hxx:99
static bool MatchesGlob(std::string_view haystack, std::string_view pattern)
Double_t err
@ kWritePermission
Definition TSystem.h:54
externTSystem * gSystem
Definition TSystem.h:582
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2994
virtual Int_t GetEntries() const
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual TList * GetListOfKeys() const
Definition TDirectory.h:224
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3788
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
subroutine node(ivo, nuserm, iposp)
Definition g2root.f:833
RootSource ParseRootSource(std::string_view sourceRaw, std::uint32_t flags)
Given a string like "file.root:dir/obj", converts it to a RootSource.
ROOT::RResult< std::pair< std::string_view, std::string_view > > SplitIntoFileNameAndPattern(std::string_view sourceRaw)
Given a string like "root://file.root:a/b/c", splits it into { "root://file.root",...
std::string NodeFullPath(const RootObjTree &tree, NodeIdx_t nodeIdx, ENodeFullPathOpt opt)
Given a node, returns its full path. If opt == kIncludeFilename, the path is prepended by "filename....
void PrintObjTree(const RootObjTree &tree, std::ostream &out=std::cout)
Prints out the structure of the object tree. Helpful for debugging.
@ kRecursive
Recurse into subdirectories when matching objects.
std::uint32_t NodeIdx_t
std::vector< ROOT::CmdLine::RootSource > ParseRootSources(const std::vector< std::string > &sourcesRaw, std::uint32_t flags)
Given a list of strings like "file.root:dir/obj", converts each string to a RootSource.
RootObjNode NodeFromKey(TKey &key)
RootSource GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags)
Given a file and a "path pattern", returns a RootSource containing the tree of matched objects.
TRangeCast< T, false > TRangeStaticCast
TRangeStaticCast is an adapter class that allows the typed iteration through a TCollection.
std::string Join(const std::string &sep, StringCollection_t &&strings)
Concatenate a list of strings with a separator.
std::vector< std::string > Split(std::string_view str, std::string_view delims, bool skipEmpty=false)
Splits a string at each character in delims.
constexpr full_match_result< const_iterator_t< Sequence >, const_iterator_t< Pattern > > match(Sequence &&sequence, Pattern &&pattern, const cards< container_item_t< Pattern > > &c=cards< container_item_t< Pattern > >(), const EqualTo &equal_to=EqualTo())
A representation of all objects involved in a rootls-style ROOT file listing.
std::unique_ptr< TFile > fFile
std::vector< std::string > fErrors
TTree * tree