Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
rootcp.cxx
Go to the documentation of this file.
1/// \file rootcp.cxx
2///
3/// Command line tool to copy objects from ROOT files to others
4///
5/// \author Giacomo Parolini <giacomo.parolini@cern.ch>
6/// \date 2025-10-09
7#include <ROOT/RLogger.hxx>
8
9#include "logging.hxx"
10#include "optparse.hxx"
11#include "RootObjTree.hxx"
12#include "RootObjTree.cxx"
13
14#include <TClass.h>
15#include <TError.h>
16#include <TFile.h>
17#include <TROOT.h>
18#include <TSystem.h>
19#include <TTree.h>
20
21#include <algorithm>
22#include <iostream>
23#include <memory>
24#include <string_view>
25#include <vector>
26
27using namespace ROOT::CmdLine;
28
29static const char *const kShortHelp = "usage: rootcp [-h] [-c COMPRESS] [--recreate] [-r|--recursive] [--replace] "
30 "[-v|--verbose] SOURCE [SOURCE ...] DEST\n";
31static const char *const kLongHelp = R"(
32Copy objects from ROOT files into another
33
34positional arguments:
35 SOURCE Source file(s)
36 DEST Destination file
37
38options:
39 -h, --help show this help message and exit
40 -c, --compress COMPRESS
41 change the compression settings of the destination file (if not already
42 existing).
43 --recreate recreate the destination file.
44 -r, --recursive recurse inside directories
45 --replace replace object if already existing
46 -v be verbose
47 -vv be even more verbose
48
49Note: If an object has been written to a file multiple times, rootcp will copy only the latest version of that object.
50
51Source and destination files accept the syntax: `protocol://path/to/file.root:path/to/object*` to select specific
52subobjects or directories in the file.
53
54Examples:
55- rootcp source.root dest.root
56 Copy the latest version of each object in 'source.root' to 'dest.root'.
57
58- rootcp source.root:hist* dest.root
59 Copy all histograms whose names start with 'hist' from 'source.root' to 'dest.root'.
60
61- rootcp source1.root:hist1 source2.root:hist2 dest.root
62 Copy histograms 'hist1' from 'source1.root' and 'hist2' from 'source2.root' to 'dest.root'.
63
64- rootcp --recreate source.root:hist dest.root
65 Recreate 'dest.root' and copy the histogram named 'hist' from 'source.root' into it.
66
67- rootcp -c 101 source.root:hist dest.root
68 Change compression, if not existing, of 'dest.root' to ZLIB algorithm with compression level 1 and copy the histogram named 'hist' from 'source.root' into it.
69 Meaning of the '-c' argument is given by 'compress = 100 * algorithm + level'.
70 Other examples of usage:
71 * -c 509 : ZSTD with compression level 9
72 * -c 404 : LZ4 with compression level 4
73 * -c 207 : LZMA with compression level 7
74 For more information see https://root.cern.ch/doc/latest-stable/classTFile.html#ad0377adf2f3d88da1a1f77256a140d60
75 and https://root.cern.ch/doc/latest-stable/structROOT_1_1RCompressionSetting.html
76)";
77
78struct RootCpArgs {
79 enum class EPrintUsage {
83 };
85 std::optional<int> fCompression = std::nullopt;
86 bool fRecreate = false;
87 bool fReplace = false;
88 bool fRecursive = false;
89 std::vector<std::string> fSources;
90};
91
92static RootCpArgs ParseArgs(const char **args, int nArgs)
93{
95
96 RootCpArgs outArgs;
97
98 RCmdLineOpts opts;
99 opts.AddFlag({"-c", "--compress"}, RCmdLineOpts::EFlagType::kWithArg);
100 opts.AddFlag({"--recreate"});
101 opts.AddFlag({"--replace"});
102 opts.AddFlag({"-r", "--recursive"});
103 opts.AddFlag({"-h", "--help"});
104 opts.AddFlag({"-v"});
105 opts.AddFlag({"-vv"});
106
107 opts.Parse(args, nArgs);
108
109 for (const auto &err : opts.GetErrors()) {
110 std::cerr << err << "\n";
111 }
112 if (!opts.GetErrors().empty()) {
114 return outArgs;
115 }
116
117 if (opts.GetSwitch("help")) {
119 return outArgs;
120 }
121
122 if (auto val = opts.GetFlagValueAs<int>("compress"); val)
123 outArgs.fCompression = val;
124 outArgs.fRecursive = opts.GetSwitch("recursive");
125 outArgs.fReplace = opts.GetSwitch("replace");
126 outArgs.fRecreate = opts.GetSwitch("recreate");
127
128 if (opts.GetSwitch("vv"))
130 else if (opts.GetSwitch("v"))
132
133 outArgs.fSources = opts.GetArgs();
134 if (outArgs.fSources.size() < 2)
136
137 return outArgs;
138}
139
140static std::unique_ptr<TFile> OpenFile(const char *fileName, const char *mode)
141{
142 const auto origLv = gErrorIgnoreLevel;
144 auto file = std::unique_ptr<TFile>(TFile::Open(fileName, mode));
145 if (!file || file->IsZombie()) {
146 Err() << "File " << fileName << "does not exist.\n";
147 return nullptr;
148 }
149 gErrorIgnoreLevel = origLv;
150 return file;
151}
152
153namespace {
154
155struct RootCpDestination {
156 TFile *fFile;
157 std::string fPath;
158 std::string fFname;
159 bool fIsNewObject;
160};
161
162} // namespace
163
164// Splits `path` into a directory path (excluding the trailing '/') and a basename.
165static std::pair<std::string_view, std::string_view> DecomposePath(std::string_view path)
166{
167 auto lastSlashIdx = path.rfind('/');
168 if (lastSlashIdx == std::string_view::npos)
169 return {{}, path};
170
171 auto dirName = path.substr(0, lastSlashIdx);
172 auto pathName = path.substr(lastSlashIdx + 1);
173 return {dirName, pathName};
174}
175
176// Copies `nodeIdx`-th node from `src`'s object tree to the file in `dest`.
177// `nodeIdx` is assumed to be in range.
178static void CopyNode(const RootSource &src, const RootCpDestination &dest, NodeIdx_t nodeIdx, const RootCpArgs &args)
179{
180 TFile *srcfile = src.fObjectTree.fFile.get();
181 // The file is guaranteed to be valid by ParseRootSource: if this crashes, it's a bug in there.
182 assert(srcfile);
183 // Similarly, nodeIdx must be in range because it always comes from a RootObjTree.
184 assert(nodeIdx < src.fObjectTree.fNodes.size());
185 const RootObjNode &node = src.fObjectTree.fNodes[nodeIdx];
186 const std::string srcFullPath = NodeFullPath(src.fObjectTree, nodeIdx, ENodeFullPathOpt::kExcludeFilename);
187 // Directory path, excluding trailing '/' and without the "file.root:" prefix.
188 const std::string_view srcDirPath =
189 (node.fParent == 0) ? std::string_view{}
190 : std::string_view{srcFullPath.data(), srcFullPath.size() - node.fName.size() - 1};
191
192 // Figure out where the output goes. If the user specified an output path (i.e. if dest.fPath is not empty), then
193 // use that. Otherwise, use the same path as the source object.
194 std::string destFullPath;
195 std::string_view destDirPath, destBaseName;
196 if (dest.fIsNewObject || dest.fPath.empty()) {
197 // User gave a destination which is not an existing directory or no destination at all
198 destFullPath = dest.fPath.empty() ? srcFullPath : dest.fPath;
199 auto decomposed = DecomposePath(destFullPath);
200 destDirPath = decomposed.first;
201 destBaseName = decomposed.second;
202 } else if (!dest.fPath.empty()) {
203 // User gave a destination which is an existing directory
204 destDirPath = dest.fPath;
205 destFullPath = std::string(destDirPath) + "/" + node.fName;
206 destBaseName = node.fName;
207 }
208
209 if (src.fFileName == dest.fFname && srcFullPath == destFullPath) {
210 Err() << src.fFileName << ":" << srcFullPath << ": source and destination cannot be the same\n";
211 return;
212 }
213
214 Info(2) << "cp " << src.fFileName << ":" << srcFullPath << " -> " << dest.fFname << ":" << destFullPath << "\n";
215
216 TDirectory *destDir = dest.fFile;
217 if (!destDirPath.empty()) {
218 Info(3) << "mkdir " << destDirPath << "\n";
219 destDir = dest.fFile->mkdir(std::string(destDirPath).c_str(), /* title = */ "",
220 /* returnPreExisting = */ true);
221 }
222
223 // Check if the destination already exists. There are 3 cases here:
224 // 1. it doesn't: just go on as normal;
225 // 2. it does and it is a directory: the copied object needs to be copied inside it, but this was already accounted
226 // for outside CopyNode, so just go on as normal;
227 // 3. it does and it's not a directory: if we have the replace flag, replace it, otherwise error out.
228 const TKey *destKey = destDir->GetKey(std::string(destBaseName).c_str());
229 if (destKey && !TClass::GetClass(destKey->GetClassName())->InheritsFrom("TDirectory") && !args.fReplace) {
230 Err() << "an object of type '" << destKey->GetClassName() << "' already exists at " << dest.fFname << ':'
231 << destFullPath << ". Use the --replace flag to overwrite existing objects.\n";
232 return;
233 }
234
235 // retrieve the object's key
236 const TDirectory *srcDir = srcfile->GetDirectory(std::string(srcDirPath).c_str(), true);
237 if (!srcDir) {
238 Err() << "failed to get source directory '" << srcDirPath << "'\n";
239 return;
240 }
241 const TKey *srcKey = srcDir->GetKey(node.fName.c_str());
242 if (!srcKey) {
243 Err() << "failed to read key of object '" << srcFullPath << "'\n";
244 return;
245 }
246
247 // Verify that the class is known and supported.
248 const std::string &className = node.fClassName;
249 const TClass *cl = TClass::GetClass(className.c_str());
250 if (!cl) {
251 Err() << "unknown object type: " << className << "; object will be skipped.\n";
252 return;
253 }
254
255 Info(3) << "read object \"" << srcFullPath << "\" of type " << node.fClassName << "\n";
256 if (!destDir) {
257 Err() << "failed to create or get destination directory \"" << dest.fFname << ":" << destDirPath << "\"\n";
258 return;
259 }
260
261 // Delete previous object if we're replacing it
262 if (destKey && args.fReplace)
263 destDir->Delete((std::string(destBaseName) + ";*").c_str());
264
265 //
266 // Do the actual copy
267 //
268 if (cl->InheritsFrom("TObject")) {
269 TObject *obj = node.fKey->ReadObj();
270 if (!obj) {
271 Err() << "failed to read object \"" << srcFullPath << "\".\n";
272 return;
273 }
274
275 if (TTree *old = dynamic_cast<TTree *>(obj)) {
276 // special case for TTree
277 TDirectory::TContext ctx(gDirectory, destDir);
278 obj = old->CloneTree(-1, "fast");
279 if (dest.fIsNewObject) {
280 static_cast<TTree *>(obj)->SetName(std::string(destBaseName).c_str());
281 }
282 obj->Write();
283 old->Delete();
284 } else if (cl->InheritsFrom("TDirectory")) {
285 // directory
286 if (!args.fRecursive) {
287 Warn() << "Directory '" << srcFullPath
288 << "' will not be copied. Use the -r option if you need a recursive copy.\n";
289 } else {
290 destDir->mkdir(node.fName.c_str(), srcKey->GetTitle(), true);
291 RootCpDestination dest2 = dest;
292 dest2.fPath = dest.fPath + (dest.fPath.empty() ? "" : "/") + node.fName;
293 for (auto childIdx = node.fFirstChild; childIdx < node.fFirstChild + node.fNChildren; ++childIdx)
294 CopyNode(src, dest2, childIdx, args);
295 }
296 } else {
297 // regular TObject
298 destDir->WriteObject(obj, std::string(destBaseName).c_str());
299 }
300 obj->Delete();
301 } else {
302 Warn() << "object '" << node.fName << "' of type '" << node.fClassName
303 << "' will not be copied, as its type is currently unsupported by rootcp.\n";
304 }
305}
306
307int main(int argc, char **argv)
308{
309 InitLog("rootcp");
310
311 // Parse arguments
312 auto args = ParseArgs(const_cast<const char **>(argv) + 1, argc - 1);
313 if (args.fPrintHelp != RootCpArgs::EPrintUsage::kNo) {
314 std::cerr << kShortHelp;
315 if (args.fPrintHelp == RootCpArgs::EPrintUsage::kLong) {
316 std::cerr << kLongHelp;
317 return 0;
318 }
319 return 1;
320 }
321
322 // Get destination. In general it may be a string like "prefix://file.root:path/to/dir", so check if it refers to
323 // a valid location.
324 // First validate the destination syntax.
325 const auto destFnameAndPattern = args.fSources.back();
326 args.fSources.pop_back();
327 auto splitRes = SplitIntoFileNameAndPattern(destFnameAndPattern);
328 if (!splitRes) {
329 Err() << splitRes.GetError()->GetReport() << "\n";
330 return 1;
331 }
332 auto [destFname, destPath] = splitRes.Unwrap();
333
334 // Validate and split all input sources into filename + pattern
335 std::vector<std::pair<std::string_view, std::string_view>> sourcesFileAndPattern;
336 sourcesFileAndPattern.reserve(args.fSources.size());
337 bool srcIsSameAsDstFile = false;
338 for (const auto &src : args.fSources) {
339 auto res = SplitIntoFileNameAndPattern(src);
340 if (!res) {
341 Err() << res.GetError()->GetReport() << "\n";
342 return 1;
343 }
344 auto fNameAndPattern = res.Unwrap();
345 if (fNameAndPattern.first == destFname) {
346 srcIsSameAsDstFile = true;
347 }
348 sourcesFileAndPattern.push_back(fNameAndPattern);
349 }
350
351 // Check if the operation is allowed.
352
353 if (args.fRecreate && srcIsSameAsDstFile) {
354 Err() << "cannot recreate destination file if this is also a source file\n";
355 return 1;
356 }
357
358 if (args.fCompression && gSystem->AccessPathName(std::string(destFname).c_str())) {
359 Err() << "can't change compression settings on existing file " << destFname << "\n";
360 return 1;
361 }
362
363 const char *destFileMode =
364 args.fRecreate ? "RECREATE_WITHOUT_GLOBALREGISTRATION" : "UPDATE_WITHOUT_GLOBALREGISTRATION";
365 auto destFile = OpenFile(std::string(destFname).c_str(), destFileMode);
366 if (!destFile)
367 return 1;
368
369 // `destPath` is the part after the colon (the input is given as `destFname:destPath`). It may be empty, but
370 // if it's not it must refer to either an existing TDirectory inside the file or to a non-existing object (it may
371 // also be an existing object if --replace was passed).
372 TKey *destDirKey = nullptr;
373 if (!destPath.empty()) {
374 destDirKey = destFile->GetKey(std::string(destPath).c_str());
375 if (destDirKey && !TClass::GetClass(destDirKey->GetClassName())->InheritsFrom("TDirectory")) {
376 if (!args.fReplace) {
377 // This error would be caught later in CopyNode, but since we can detect it early let's bail out before
378 // wasting time touching other files.
379 Err() << "destination path \"" << destFname << ":" << destPath << "\" already exists (as an object of type "
380 << destDirKey->GetClassName() << "). Use the --replace flag to overwrite it.\n";
381 return 1;
382 } else {
383 destDirKey = nullptr;
384 }
385 }
386 }
387
388 // If we are copying multiple objects the destination path must either be empty or a TDirectory.
389 // The same check is done in the loop over the sources below, but this is an early-out in case multiple
390 // files are given as input (as opposed to multiple objects in the same file, which is checked in the loop).
391 const bool destIsNewObject = !destPath.empty() && !destDirKey;
392 if (destIsNewObject && args.fSources.size() > 1) {
393 Err() << "multiple sources were specified, but destination path \"" << destFname << ":" << destPath
394 << "\" is not a directory.\n";
395 return 1;
396 }
397
398 if (args.fCompression)
399 destFile->SetCompressionSettings(*args.fCompression);
400
401 const std::uint32_t flags = args.fRecursive * EGetMatchingPathsFlags::kRecursive;
402 bool errors = false;
403 for (const auto &[srcFname, srcPattern] : sourcesFileAndPattern) {
404 auto src = ROOT::CmdLine::GetMatchingPathsInFile(srcFname, srcPattern, flags);
405 if (!src.fErrors.empty()) {
406 for (const auto &err : src.fErrors)
407 Err() << err << "\n";
408
409 errors = true;
410 break;
411 }
412
413 // We should never register files to the global list for performance reasons.
414 assert(!gROOT->GetListOfFiles()->Contains(src.fObjectTree.fFile.get()));
415
416 // If we are copying multiple objects the destination path must either be empty or a TDirectory.
417 if (destIsNewObject && src.fObjectTree.fLeafList.size() + src.fObjectTree.fDirList.size() > 1) {
418 Err() << "multiple sources were specified but destination path \"" << destFname << ":" << destPath
419 << "\" is not a directory.\n";
420
421 errors = true;
422 break;
423 }
424
425 // Iterate all objects we need to copy
426 RootCpDestination dest;
427 dest.fFile = destFile.get();
428 dest.fFname = destFname;
429 dest.fIsNewObject = destIsNewObject;
430 dest.fPath = destPath;
431 for (auto nodeIdx : src.fObjectTree.fLeafList) {
432 CopyNode(src, dest, nodeIdx, args);
433 }
434 for (auto nodeIdx : src.fObjectTree.fDirList) {
435 if (nodeIdx == 0) {
436 // The root file node needs special treatment; for all other "top-level" directories, CopyNode handles them.
437 const auto &node = src.fObjectTree.fNodes[nodeIdx];
438 for (auto childIdx = node.fFirstChild; childIdx < node.fFirstChild + node.fNChildren; ++childIdx)
439 CopyNode(src, dest, childIdx, args);
440 } else {
441 CopyNode(src, dest, nodeIdx, args);
442 }
443 }
444 }
445
446 if (errors && !srcIsSameAsDstFile) {
447 // If the destination file was fresh, make sure we don't end up with a half-copied file in case of errors.
448 gSystem->Unlink(std::string(destFname).c_str());
449 }
450
451 return errors;
452}
#define gDirectory
Definition TDirectory.h:385
constexpr Int_t kError
Definition TError.h:47
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:241
externInt_t gErrorIgnoreLevel
errors with level below this value will be ignored. Default is kUnset.
Definition TError.h:140
Double_t err
#define gROOT
Definition TROOT.h:417
externTSystem * gSystem
Definition TSystem.h:582
std::vector< double > errors
void AddFlag(std::initializer_list< std::string_view > aliases, EFlagType type=EFlagType::kSwitch, std::string_view help="", std::uint32_t flagOpts=0)
Defines a new flag (either a switch or a flag with argument).
Definition optparse.hxx:208
const std::vector< std::string > & GetErrors() const
Returns all parsing errors.
Definition optparse.hxx:180
int GetSwitch(std::string_view name) const
If name refers to a previously-defined switch (i.e.
Definition optparse.hxx:300
const std::vector< std::string > & GetArgs() const
Retrieves all positional arguments.
Definition optparse.hxx:182
void Parse(const char **args, std::size_t nArgs)
Definition optparse.hxx:425
std::optional< T > GetFlagValueAs(std::string_view name) const
Definition optparse.hxx:356
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2994
TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE) override
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory") override
Find a directory named "apath".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
void Delete(const char *namecycle="") override
Delete Objects or/and keys in a directory.
std::enable_if_t<!std::is_base_of< TObject, T >::value, Int_t > WriteObject(const T *obj, const char *name, Option_t *option="", Int_t bufsize=0)
Write an object with proper type checking.
Definition TDirectory.h:283
virtual TKey * GetKey(const char *, Short_t=9999) const
Definition TDirectory.h:222
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3788
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
const char * GetTitle() const override
Returns title (title can contain 32x32 xpm thumbnail/icon).
Definition TKey.cxx:1536
virtual const char * GetClassName() const
Definition TKey.h:77
Mother of all ROOT objects.
Definition TObject.h:42
A TTree represents a columnar dataset.
Definition TTree.h:89
void file()
Definition file.C:11
int main()
subroutine node(ivo, nuserm, iposp)
Definition g2root.f:833
gr SetName("gr")
void SetLogVerbosity(int verbosity)
Definition logging.hxx:45
std::ostream & Warn()
Definition logging.hxx:61
void InitLog(const char *name, int defaultVerbosity=1)
Definition logging.hxx:39
std::ostream & Err()
Definition logging.hxx:55
ROOT::RResult< std::pair< std::string_view, std::string_view > > SplitIntoFileNameAndPattern(std::string_view sourceRaw)
Given a string like "root://file.root:a/b/c", splits it into { "root://file.root",...
std::string NodeFullPath(const RootObjTree &tree, NodeIdx_t nodeIdx, ENodeFullPathOpt opt)
Given a node, returns its full path. If opt == kIncludeFilename, the path is prepended by "filename....
@ kRecursive
Recurse into subdirectories when matching objects.
std::uint32_t NodeIdx_t
RootSource GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags)
Given a file and a "path pattern", returns a RootSource containing the tree of matched objects.
static const char *const kShortHelp
Command line tool to open a ROOT file on a TBrowser.
static const char *const kLongHelp
static std::unique_ptr< TFile > OpenFile(const char *fileName, const char *mode)
Definition rootcp.cxx:140
static RootCpArgs ParseArgs(const char **args, int nArgs)
Definition rootcp.cxx:92
static std::pair< std::string_view, std::string_view > DecomposePath(std::string_view path)
Definition rootcp.cxx:165
static void CopyNode(const RootSource &src, const RootCpDestination &dest, NodeIdx_t nodeIdx, const RootCpArgs &args)
Definition rootcp.cxx:178
std::unique_ptr< TFile > fFile
std::vector< RootObjNode > fNodes
bool fRecreate
Definition rootcp.cxx:86
std::optional< int > fCompression
Definition rootcp.cxx:85
bool fRecursive
Definition rootcp.cxx:88
std::vector< std::string > fSources
Definition rootcp.cxx:89
bool fReplace
Definition rootcp.cxx:87
EPrintUsage fPrintHelp
Definition rootcp.cxx:84