Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
hadd.cxx
Go to the documentation of this file.
1/**
2 \file hadd.cxx
3 \brief This program will add histograms (see note) and Trees from a list of root files and write them to a target root file.
4 The target file is newly created and must not be
5 identical to one of the source files.
6
7 Syntax:
8 ```{.cpp}
9 hadd targetfile source1 source2 ...
10 ```
11 or
12 ```{.cpp}
13 hadd -f targetfile source1 source2 ...
14 ```
15 (targetfile is overwritten if it exists)
16
17 \param -a Append to the output
18 \param -f Force overwriting of output file.
19 \param -f[0-9] Set target compression level. 0 = uncompressed, 9 = highly compressed. Default is 1 (kDefaultZLIB).
20 You can also specify the full compresion algorithm, e.g. -f206
21 \param -fk Sets the target file to contain the baskets with the same compression
22 as the input files (unless -O is specified). Compresses the meta data
23 using the compression level specified in the first input or the
24 compression setting after fk (for example 206 when using -fk206)
25 \param -ff The compression level used is the one specified in the first input
26 \param -k Skip corrupt or non-existent files, do not exit
27 \param -O Re-optimize basket size when merging TTree
28 \param -T Do not merge Trees
29 \param -v Explicitly set the verbosity level: 0 request no output, 99 is the default
30 \param -j Parallelise the execution in `J` processes. If the number of processes is not specified, use the system maximum.
31 \param -dbg Enable verbosity. If -j was specified, do not not delete partial files stored inside working directory.
32 \param -d Carry out the partial multiprocess execution in the specified directory
33 \param -n Open at most `N` files at once (use 0 to request to use the system maximum)
34 \param -cachesize Resize the prefetching cache use to speed up I/O operations (use 0 to disable).
35 \param -experimental-io-features `<feature>` Enables the corresponding experimental feature for output trees. \see ROOT::Experimental::EIOFeatures
36 \return hadd returns a status code: 0 if OK, -1 otherwise
37
38 For example assume 3 files f1, f2, f3 containing histograms hn and Trees Tn
39 - f1 with h1 h2 h3 T1
40 - f2 with h1 h4 T1 T2
41 - f3 with h5
42 the result of
43 ```
44 hadd -f x.root f1.root f2.root f3.root
45 ```
46 will be a file x.root with h1 h2 h3 h4 h5 T1 T2
47 where
48 - h1 will be the sum of the 2 histograms in f1 and f2
49 - T1 will be the merge of the Trees in f1 and f2
50
51 The files may contain sub-directories.
52
53 If the source files contains histograms and Trees, one can skip
54 the Trees with
55 ```
56 hadd -T targetfile source1 source2 ...
57 ```
58
59 Wildcarding and indirect files are also supported
60 ```
61 hadd result.root myfil*.root
62 ```
63 will merge all files in myfil*.root
64 ```
65 hadd result.root file1.root @list.txt file2. root myfil*.root
66 ```
67 will merge file1.root, file2.root, all files in myfil*.root
68 and all files in the indirect text file list.txt ("@" as the first
69 character of the file indicates an indirect file. An indirect file
70 is a text file containing a list of other files, including other
71 indirect files, one line per file).
72
73 If the sources and and target compression levels are identical (default),
74 the program uses the TChain::Merge function with option "fast", ie
75 the merge will be done without unzipping or unstreaming the baskets
76 (i.e. direct copy of the raw byte on disk). The "fast" mode is typically
77 5 times faster than the mode unzipping and unstreaming the baskets.
78
79 If the option -cachesize is used, hadd will resize (or disable if 0) the
80 prefetching cache use to speed up I/O operations.
81
82 For options that take a size as argument, a decimal number of bytes is expected.
83 If the number ends with a `k`, `m`, `g`, etc., the number is multiplied
84 by 1000 (1K), 1000000 (1MB), 1000000000 (1G), etc.
85 If this prefix is followed by `i`, the number is multiplied by the traditional
86 1024 (1KiB), 1048576 (1MiB), 1073741824 (1GiB), etc.
87 The prefix can be optionally followed by B whose casing is ignored,
88 eg. 1k, 1K, 1Kb and 1KB are the same.
89
90 \note By default histograms are added. However hadd does not support the case where
91 histograms have their bit TH1::kIsAverage set.
92
93 \authors Rene Brun, Dirk Geppert, Sven A. Schmidt, Toby Burnett
94*/
95#include "Compression.h"
96#include <ROOT/RConfig.hxx>
97#include "ROOT/TIOFeatures.hxx"
98#include "TFile.h"
99#include "THashList.h"
100#include "TKey.h"
101#include "TClass.h"
102#include "TSystem.h"
103#include "TUUID.h"
104#include "ROOT/StringConv.hxx"
105#include "snprintf.h"
106
107#include <string>
108#include <iostream>
109#include <fstream>
110#include <cstdlib>
111#include <climits>
112#include <sstream>
113#include "haddCommandLineOptionsHelp.h"
114
115#include "TFileMerger.h"
116#ifndef R__WIN32
118#endif
119
120////////////////////////////////////////////////////////////////////////////////
121
122int main( int argc, char **argv )
123{
124 if ( argc < 3 || "-h" == std::string(argv[1]) || "--help" == std::string(argv[1]) ) {
126 return (argc == 2 && ("-h" == std::string(argv[1]) || "--help" == std::string(argv[1]))) ? 0 : 1;
127 }
128
130 Bool_t append = kFALSE;
140 Int_t verbosity = 99;
141 TString cacheSize;
142 SysInfo_t s;
143 gSystem->GetSysInfo(&s);
144 auto nProcesses = s.fCpus;
146 int outputPlace = 0;
147 int ffirst = 2;
148 Int_t newcomp = -1;
149 for( int a = 1; a < argc; ++a ) {
150 if ( strcmp(argv[a],"-T") == 0 ) {
151 noTrees = kTRUE;
152 ++ffirst;
153 } else if ( strcmp(argv[a],"-a") == 0 ) {
154 append = kTRUE;
155 ++ffirst;
156 } else if ( strcmp(argv[a],"-f") == 0 ) {
157 force = kTRUE;
158 ++ffirst;
159 } else if ( strcmp(argv[a],"-k") == 0 ) {
161 ++ffirst;
162 } else if ( strcmp(argv[a],"-O") == 0 ) {
164 ++ffirst;
165 } else if (strcmp(argv[a], "-dbg") == 0) {
166 debug = kTRUE;
168 ++ffirst;
169 } else if (strcmp(argv[a], "-d") == 0) {
170 if (a + 1 != argc && argv[a + 1][0] != '-') {
171 if (gSystem->AccessPathName(argv[a + 1])) {
172 std::cerr << "Error: could not access the directory specified: " << argv[a + 1]
173 << ". We will use the system's temporal directory.\n";
174 } else {
175 workingDir = argv[a + 1];
176 }
177 ++a;
178 ++ffirst;
179 } else {
180 std::cout << "-d: no directory specified. We will use the system's temporal directory.\n";
181 }
182 ++ffirst;
183 } else if (strcmp(argv[a], "-j") == 0) {
184 // If the number of processes is not specified, use the default.
185 if (a + 1 != argc && argv[a + 1][0] != '-') {
186 // number of processes specified
187 Long_t request = 1;
188 for (char *c = argv[a + 1]; *c != '\0'; ++c) {
189 if (!isdigit(*c)) {
190 // Wrong number of Processes. Use the default:
191 std::cerr << "Error: could not parse the number of processes to run in parallel passed after -j: "
192 << argv[a + 1] << ". We will use the system maximum.\n";
193 request = 0;
194 break;
195 }
196 }
197 if (request == 1) {
198 request = strtol(argv[a + 1], 0, 10);
200 nProcesses = (Int_t)request;
201 ++a;
202 ++ffirst;
203 std::cout << "Parallelizing with " << nProcesses << " processes.\n";
204 } else {
205 std::cerr << "Error: could not parse the number of processes to use passed after -j: " << argv[a + 1]
206 << ". We will use the default value (number of logical cores).\n";
207 }
208 }
209 }
211 ++ffirst;
212 } else if ( strcmp(argv[a],"-cachesize=") == 0 ) {
213 int size;
214 static const size_t arglen = strlen("-cachesize=");
217 std::cerr << "Error: could not parse the cache size passed after -cachesize: "
218 << argv[a + 1] << ". We will use the default value.\n";
220 double m;
221 const char *munit = nullptr;
223 std::cerr << "Error: the cache size passed after -cachesize is too large: "
224 << argv[a + 1] << " is greater than " << m << munit
225 << ". We will use the default value.\n";
226 } else {
227 cacheSize = "cachesize=";
228 cacheSize.Append(argv[a]+1);
229 }
230 ++ffirst;
231 } else if ( strcmp(argv[a],"-cachesize") == 0 ) {
232 if (a+1 >= argc) {
233 std::cerr << "Error: no cache size number was provided after -cachesize.\n";
234 } else {
235 int size;
238 std::cerr << "Error: could not parse the cache size passed after -cachesize: "
239 << argv[a + 1] << ". We will use the default value.\n";
241 double m;
242 const char *munit = nullptr;
244 std::cerr << "Error: the cache size passed after -cachesize is too large: "
245 << argv[a + 1] << " is greater than " << m << munit
246 << ". We will use the default value.\n";
247 ++a;
248 ++ffirst;
249 } else {
250 cacheSize = "cachesize=";
251 cacheSize.Append(argv[a+1]);
252 ++a;
253 ++ffirst;
254 }
255 }
256 ++ffirst;
257 } else if (!strcmp(argv[a], "-experimental-io-features")) {
258 if (a+1 >= argc) {
259 std::cerr << "Error: no IO feature was specified after -experimental-io-features; ignoring\n";
260 } else {
261 std::stringstream ss;
262 ss.str(argv[++a]);
263 ++ffirst;
264 std::string item;
265 while (std::getline(ss, item, ',')) {
266 if (!features.Set(item)) {
267 std::cerr << "Ignoring unknown feature request: " << item << std::endl;
268 }
269 }
270 }
271 ++ffirst;
272 } else if ( strcmp(argv[a],"-n") == 0 ) {
273 if (a+1 >= argc) {
274 std::cerr << "Error: no maximum number of opened was provided after -n.\n";
275 } else {
276 Long_t request = strtol(argv[a+1], 0, 10);
278 maxopenedfiles = (Int_t)request;
279 ++a;
280 ++ffirst;
281 } else {
282 std::cerr << "Error: could not parse the max number of opened file passed after -n: " << argv[a+1] << ". We will use the system maximum.\n";
283 }
284 }
285 ++ffirst;
286 } else if ( strcmp(argv[a],"-v") == 0 ) {
287 if (a+1 == argc || argv[a+1][0] == '-') {
288 // Verbosity level was not specified use the default:
289 verbosity = 99;
290// if (a+1 >= argc) {
291// std::cerr << "Error: no verbosity level was provided after -v.\n";
292 } else {
294 for (char *c = argv[a+1]; *c != '\0'; ++c) {
295 if (!isdigit(*c)) {
296 // Verbosity level was not specified use the default:
298 break;
299 }
300 }
301 if (hasFollowupNumber) {
302 Long_t request = strtol(argv[a+1], 0, 10);
304 verbosity = (Int_t)request;
305 ++a;
306 ++ffirst;
307 } else {
308 verbosity = 99;
309 std::cerr << "Error: could not parse the verbosity level passed after -v: " << argv[a+1] << ". We will use the default value (99).\n";
310 }
311 }
312 }
313 ++ffirst;
314 } else if ( argv[a][0] == '-' ) {
315 bool farg = false;
316 if (force && argv[a][1] == 'f') {
317 // Bad argument
318 std::cerr << "Error: Using option " << argv[a] << " more than once is not supported.\n";
319 ++ffirst;
320 farg = true;
321 }
322 const char *prefix = "";
323 if (argv[a][1] == 'f' && argv[a][2] == 'k') {
324 farg = true;
325 force = kTRUE;
327 prefix = "k";
328 }
329 if (argv[a][1] == 'f' && argv[a][2] == 'f') {
330 farg = true;
331 force = kTRUE;
333 if (argv[a][3] != '\0') {
334 std::cerr << "Error: option -ff should not have any suffix: " << argv[a] << " (suffix has been ignored)\n";
335 }
336 }
337 char ft[7];
338 for (int alg = 0; !useFirstInputCompression && alg <= 5; ++alg) {
339 for( int j=0; j<=9; ++j ) {
340 const int comp = (alg*100)+j;
341 snprintf(ft,7,"-f%s%d",prefix,comp);
342 if (!strcmp(argv[a],ft)) {
343 farg = true;
344 force = kTRUE;
345 newcomp = comp;
346 break;
347 }
348 }
349 }
350 if (!farg) {
351 // Bad argument
352 std::cerr << "Error: option " << argv[a] << " is not a supported option.\n";
353 }
354 ++ffirst;
355 } else if (!outputPlace) {
356 outputPlace = a;
357 }
358 }
359
360 gSystem->Load("libTreePlayer");
361
362 const char *targetname = 0;
363 if (outputPlace) {
365 } else {
367 }
368
369 if (verbosity > 1) {
370 std::cout << "hadd Target file: " << targetname << std::endl;
371 }
372
374 fileMerger.SetMsgPrefix("hadd");
375 fileMerger.SetPrintLevel(verbosity - 1);
376 if (maxopenedfiles > 0) {
377 fileMerger.SetMaxOpenedFiles(maxopenedfiles);
378 }
379 // The following section will collect all input filenames into a vector,
380 // including those listed within an indirect file.
381 // If any file can not be accessed, it will error out, unless skip_errors is true
382 std::vector<std::string> allSubfiles;
383 for (int a = ffirst; a < argc; ++a) {
384 if (a == outputPlace)
385 continue;
386 if (argv[a] && argv[a][0] == '@') {
387 std::ifstream indirect_file(argv[a] + 1);
388 if (!indirect_file.is_open()) {
389 std::cerr << "hadd could not open indirect file " << (argv[a] + 1) << std::endl;
390 if (!skip_errors)
391 return 1;
392 } else {
393 std::string line;
394 while (indirect_file) {
395 if( std::getline(indirect_file, line) && line.length() ) {
396 if (gSystem->AccessPathName(line.c_str(), kReadPermission) == kTRUE) {
397 std::cerr << "hadd could not validate the file name \"" << line << "\" within indirect file "
398 << (argv[a] + 1) << std::endl;
399 if (!skip_errors)
400 return 1;
401 } else
402 allSubfiles.emplace_back(line);
403 }
404 }
405 }
406 } else {
407 const std::string line = argv[a];
408 if (gSystem->AccessPathName(line.c_str(), kReadPermission) == kTRUE) {
409 std::cerr << "hadd could not validate argument \"" << line << "\" as input file " << std::endl;
410 if (!skip_errors)
411 return 1;
412 } else
413 allSubfiles.emplace_back(line);
414 }
415 }
416 if (allSubfiles.empty()) {
417 std::cerr << "hadd could not find any valid input file " << std::endl;
418 return 1;
419 }
420 // The next snippet determines the output compression if unset
421 if (newcomp == -1) {
423 // grab from the first file.
424 TFile *firstInput = TFile::Open(allSubfiles.front().c_str());
425 if (firstInput && !firstInput->IsZombie())
426 newcomp = firstInput->GetCompressionSettings();
427 else
429 delete firstInput;
430 fileMerger.SetMergeOptions(TString("first_source_compression"));
431 } else {
433 fileMerger.SetMergeOptions(TString("default_compression"));
434 }
435 }
436 if (verbosity > 1) {
438 std::cout << "hadd compression setting for meta data: " << newcomp << '\n';
439 else
440 std::cout << "hadd compression setting for all output: " << newcomp << '\n';
441 }
442 if (append) {
443 if (!fileMerger.OutputFile(targetname, "UPDATE", newcomp)) {
444 std::cerr << "hadd error opening target file for update :" << argv[ffirst-1] << "." << std::endl;
445 exit(2);
446 }
447 } else if (!fileMerger.OutputFile(targetname, force, newcomp)) {
448 std::cerr << "hadd error opening target file (does " << argv[ffirst-1] << " exist?)." << std::endl;
449 if (!force) std::cerr << "Pass \"-f\" argument to force re-creation of output file." << std::endl;
450 exit(1);
451 }
452
453 auto step = (allSubfiles.size() + nProcesses - 1) / nProcesses;
454 if (multiproc && step < 3) {
455 // At least 3 files per process
456 step = 3;
457 nProcesses = (allSubfiles.size() + step - 1) / step;
458 std::cout << "Each process should handle at least 3 files for efficiency.";
459 std::cout << " Setting the number of processes to: " << nProcesses << std::endl;
460 }
461 if (nProcesses == 1)
463
464 std::vector<std::string> partialFiles;
465
466#ifndef R__WIN32
467 // this is commented out only to try to prevent false positive detection
468 // from several anti-virus engines on Windows, and multiproc is not
469 // supported on Windows anyway
470 if (multiproc) {
471 auto uuid = TUUID();
472 auto partialTail = uuid.AsString();
473 for (auto i = 0; (i * step) < allSubfiles.size(); i++) {
474 std::stringstream buffer;
475 buffer << workingDir << "/partial" << i << "_" << partialTail << ".root";
476 partialFiles.emplace_back(buffer.str());
477 }
478 }
479#endif
480
481 auto mergeFiles = [&](TFileMerger &merger) {
482 if (reoptimize) {
483 merger.SetFastMethod(kFALSE);
484 } else {
485 if (!keepCompressionAsIs && merger.HasCompressionChange()) {
486 // Don't warn if the user explicitly requested re-optimization.
487 std::cout << "hadd Sources and Target have different compression settings\n";
488 std::cout << "hadd merging will be slower" << std::endl;
489 }
490 }
491 merger.SetNotrees(noTrees);
492 merger.SetMergeOptions(TString(merger.GetMergeOptions()) + " " + cacheSize);
493 merger.SetIOFeatures(features);
494 Bool_t status;
495 if (append)
496 status = merger.PartialMerge(TFileMerger::kIncremental | TFileMerger::kAll);
497 else
498 status = merger.Merge();
499 return status;
500 };
501
502 auto sequentialMerge = [&](TFileMerger &merger, int start, int nFiles) {
503 for (auto i = start; i < (start + nFiles) && i < static_cast<int>(allSubfiles.size()); i++) {
504 if (!merger.AddFile(allSubfiles[i].c_str())) {
505 if (skip_errors) {
506 std::cerr << "hadd skipping file with error: " << allSubfiles[i] << std::endl;
507 } else {
508 std::cerr << "hadd exiting due to error in " << allSubfiles[i] << std::endl;
509 return kFALSE;
510 }
511 }
512 }
513 return mergeFiles(merger);
514 };
515
516 auto parallelMerge = [&](int start) {
518 mergerP.SetMsgPrefix("hadd");
519 mergerP.SetPrintLevel(verbosity - 1);
520 if (maxopenedfiles > 0) {
521 mergerP.SetMaxOpenedFiles(maxopenedfiles / nProcesses);
522 }
523 if (!mergerP.OutputFile(partialFiles[start / step].c_str(), newcomp)) {
524 std::cerr << "hadd error opening target partial file" << std::endl;
525 exit(1);
526 }
527 return sequentialMerge(mergerP, start, step);
528 };
529
530 auto reductionFunc = [&]() {
531 for (const auto &pf : partialFiles) {
532 fileMerger.AddFile(pf.c_str());
533 }
534 return mergeFiles(fileMerger);
535 };
536
537 Bool_t status;
538
539#ifndef R__WIN32
540 if (multiproc) {
542 auto res = p.Map(parallelMerge, ROOT::TSeqI(0, allSubfiles.size(), step));
543 status = std::accumulate(res.begin(), res.end(), 0U) == partialFiles.size();
544 if (status) {
545 status = reductionFunc();
546 } else {
547 std::cout << "hadd failed at the parallel stage" << std::endl;
548 }
549 if (!debug) {
550 for (const auto &pf : partialFiles) {
551 gSystem->Unlink(pf.c_str());
552 }
553 }
554 } else {
555 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
556 }
557#else
558 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
559#endif
560
561 if (status) {
562 if (verbosity == 1) {
563 std::cout << "hadd merged " << allSubfiles.size() << " (" << fileMerger.GetMergeList()->GetEntries()
564 << ") input (partial) files into " << targetname << ".\n";
565 }
566 return 0;
567 } else {
568 if (verbosity == 1) {
569 std::cout << "hadd failure during the merge of " << allSubfiles.size() << " ("
570 << fileMerger.GetMergeList()->GetEntries() << ") input (partial) files into " << targetname << ".\n";
571 }
572 return 1;
573 }
574}
int main()
Definition Prototype.cxx:12
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
bool Bool_t
Definition RtypesCore.h:63
int Int_t
Definition RtypesCore.h:45
long Long_t
Definition RtypesCore.h:54
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
@ kReadPermission
Definition TSystem.h:55
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
#define snprintf
Definition civetweb.c:1540
TIOFeatures provides the end-user with the ability to change the IO behavior of data written via a TT...
This class provides a simple interface to execute the same task multiple times in parallel,...
This class provides file copy and merging services.
Definition TFileMerger.h:30
@ kAll
Merge all type of objects (default)
Definition TFileMerger.h:78
@ kIncremental
Merge the input file with the content of the output file (if already existing).
Definition TFileMerger.h:73
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4094
Basic string class.
Definition TString.h:139
TString & Append(const char *cs)
Definition TString.h:572
virtual int GetSysInfo(SysInfo_t *info) const
Returns static system info, like OS type, CPU type, number of CPUs RAM size, etc into the SysInfo_t s...
Definition TSystem.cxx:2458
virtual int Load(const char *module, const char *entry="", Bool_t system=kFALSE)
Load a shared library.
Definition TSystem.cxx:1857
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1296
virtual int Unlink(const char *name)
Unlink, i.e.
Definition TSystem.cxx:1381
virtual const char * TempDirectory() const
Return a user configured or systemwide directory to create temporary files in.
Definition TSystem.cxx:1482
This class defines a UUID (Universally Unique IDentifier), also known as GUIDs (Globally Unique IDent...
Definition TUUID.h:42
TLine * line
static constexpr const char kCommandLineOptionsHelp[]
void ToHumanReadableSize(value_type bytes, Bool_t si, Double_t *coeff, const char **units)
Return the size expressed in 'human readable' format.
EFromHumanReadableSize FromHumanReadableSize(std::string_view str, T &value)
Convert strings like the following into byte counts 5MB, 5 MB, 5M, 3.7GB, 123b, 456kB,...
@ kUseCompiledDefault
Use the compile-time default setting.
Definition Compression.h:53
Int_t fCpus
Definition TSystem.h:162
TMarker m
Definition textangle.C:8