153#include "haddCommandLineOptionsHelp.h"
154#include "logging.hxx"
215 const auto argLen = strlen(arg);
216 const auto flagLen = strlen(flagStr);
217 if (argLen == flagLen && strncmp(arg, flagStr, flagLen) == 0) {
219 Warn() <<
"duplicate flag: " << flagStr <<
"\n";
228static std::optional<IntFlag_t>
StrToUInt(
const char *str)
272 Err() <<
"error parsing integer argument '" << arg <<
"'\n";
280 std::stringstream ss;
283 while (std::getline(ss, item,
',')) {
284 if (!features.
Set(item))
285 Warn() <<
"ignoring unknown feature request: " << item <<
"\n";
296 Err() <<
"could not parse the cache size passed after -cachesize: '" << arg <<
"'\n";
300 const char *munit =
nullptr;
302 Warn() <<
"the cache size passed after -cachesize is too large: " << arg <<
" is greater than " <<
m << munit
303 <<
". We will use the maximum value.\n";
306 cacheSize =
"cachesize=";
314 if (strcmp(arg,
"SkipListed") == 0)
316 if (strcmp(arg,
"OnlyListed") == 0)
319 Err() <<
"invalid argument for -Ltype: '" << arg <<
"'. Can only be 'SkipListed' or 'OnlyListed' (case matters).\n";
328FlagArg(
int argc,
char **argv,
int &argIdxInOut,
const char *flagStr, std::optional<T> &flagOut,
331 int argIdx = argIdxInOut;
332 const char *arg = argv[argIdx] + 1;
333 int argLen = strlen(arg);
334 int flagLen = strlen(flagStr);
335 const char *nxtArg =
nullptr;
337 if (strncmp(arg, flagStr, flagLen) != 0)
340 bool argIsSeparate =
false;
341 if (argLen > flagLen) {
343 nxtArg = arg + flagLen;
345 if (nxtArg[0] ==
'=')
347 }
else if (argLen == flagLen) {
348 argIsSeparate =
true;
349 if (argIdx + 1 < argc) {
351 nxtArg = argv[argIdxInOut];
353 Err() <<
"expected argument after '-" << flagStr <<
"' flag.\n";
360 auto converted = conv(nxtArg);
362 flagOut = converted.fValue;
364 if (defaultVal && argIsSeparate) {
365 flagOut = defaultVal;
367 argIdxInOut -= (argIdxInOut > argIdx);
369 Err() <<
"the argument after '-" << flagStr <<
"' flag was not of the expected type.\n";
382 if (compSettings == 0)
385 if (compSettings >= 1 && compSettings <= 9) {
386 Warn() <<
"interpreting " << compSettings <<
" as " << 100 + compSettings
388 " This behavior is deprecated, please use the full compression settings.\n";
391 return (compSettings >= 100 && compSettings <= 509) && ((compSettings / 10) % 10 == 0);
419 const char *cur = arg + 1;
424 Warn() <<
"duplicate flag: -ff\n";
427 <<
"[err] Cannot specify both -ff and -f[0-9]. Either use the first input compression or specify it.\n";
434 Warn() <<
"duplicate flag: -fk\n";
438 if (isdigit(cur[0])) {
440 Err() <<
"cannot specify both -ff and -f[0-9]. Either use the first input compression or "
451 Err() << *compLv <<
" is not a supported compression settings.\n";
455 Err() <<
"failed to parse compression settings '" << cur <<
"' as an integer.\n";
459 Err() <<
"cannot specify -f[0-9] multiple times!\n";
463 Err() <<
"invalid flag: " << arg <<
"\n";
475static std::optional<HAddArgs>
ParseArgs(
int argc,
char **argv)
481 kParseFirstFlagGroup,
482 kParseFirstPosArgGroup,
483 kParseSecondFlagGroup,
484 } parseState = kParseStart;
486 for (
int argIdx = 1; argIdx < argc; ++argIdx) {
487 const char *argRaw = argv[argIdx];
492 if (argRaw[1] ==
'-' && argRaw[2] ==
'\0') {
494 if (parseState > kParseFirstFlagGroup) {
496 <<
"found `--`, but we've already parsed (or are still parsing) a sequence of positional arguments!"
497 " This is not supported: you must have exactly one sequence of positional arguments, so if you"
498 " need to use `--` make sure to pass *all* positional arguments after it.";
506 parseState = (parseState == kParseFirstPosArgGroup) ? kParseSecondFlagGroup : kParseFirstFlagGroup;
508 const char *arg = argRaw + 1;
509 bool validFlag =
false;
511#define PARSE_FLAG(func, ...) \
514 const auto res = func(__VA_ARGS__); \
515 if (res == EFlagResult::kErr) \
517 validFlag = res == EFlagResult::kParsed; \
544 Warn() <<
"unknown flag: " << argRaw <<
"\n";
549 assert(parseState < kParseFirstPosArgGroup);
550 parseState = kParseFirstPosArgGroup;
553 if (parseState == kParseFirstPosArgGroup) {
558 Err() <<
"seen a positional argument '" << argRaw
559 <<
"' after some flags."
560 " Positional arguments were already parsed at this point (from '"
562 <<
"' onwards), and you can only have one sequence of them, so you cannot pass more."
563 " Please group your positional arguments all together so that hadd works as you expect.\n"
565 for (
int i = 0; i < argc; ++i)
566 std::cerr << argv[i] <<
" ";
579 std::optional<Int_t> objectFilterType,
TFileMerger &fileMerger)
581 if (filterFileName) {
582 std::ifstream filterFile(*filterFileName);
584 Err() <<
"error opening filter file '" << *filterFileName <<
"'\n";
591 while (std::getline(filterFile,
line)) {
592 std::istringstream ss(
line);
596 if (!objPath.empty() && objPath[0] !=
'#') {
597 filteredObjects.
Append(objPath +
' ');
603 Info(2) <<
"added " << nObjects <<
" object from filter file '" << *filterFileName <<
"'\n";
606 Warn() <<
"no objects were added from filter file '" << *filterFileName <<
"'\n";
609 assert(objectFilterType.has_value());
610 const auto filterFlag = *objectFilterType;
619 const bool sourceHasProtocol = source.find_first_of(
"://") == std::string_view::npos;
620 const bool targetHasProtocol = target.find_first_of(
"://") == std::string_view::npos;
621 if (sourceHasProtocol != targetHasProtocol)
625 if (!sourceHasProtocol)
626 return source == target;
628 return std::filesystem::exists(target) && std::filesystem::equivalent(source, target);
635 const auto argsOpt =
ParseArgs(argc, argv);
662 nProcesses = s.
fCpus;
665 Info(2) <<
"parallelizing with " << nProcesses <<
" processes.\n";
668 std::string workingDir;
670 workingDir =
gSystem->TempDirectory();
672 Err() <<
"could not access the directory specified: " << *args.
fWorkingDir <<
".\n";
680 Err() <<
"-L must always be passed along with -Ltype.\n";
684 const char *targetname = 0;
686 Err() <<
"missing output file.\n";
687 fputs(kCommandLineShortHelp, stderr);
691 Err() <<
"missing input file.\n";
692 fputs(kCommandLineShortHelp, stderr);
697 Info(2) <<
"target file: " << targetname <<
"\n";
700 Info(2) <<
"Using " << cacheSize <<
"\n";
704 gSystem->Load(
"libTreePlayer");
709 if (maxopenedfiles > 0) {
715 std::vector<std::string> allSubfiles;
720 if (argv[
a] && argv[
a][0] ==
'@') {
721 std::ifstream indirect_file(argv[
a] + 1);
722 if (!indirect_file.is_open()) {
723 Err() <<
"could not open indirect file " << (argv[
a] + 1) << std::endl;
728 while (indirect_file) {
729 if (std::getline(indirect_file,
line) &&
line.length()) {
731 Err() <<
"could not validate the file name \"" <<
line <<
"\" within indirect file "
732 << (argv[
a] + 1) << std::endl;
736 Err() <<
"file " <<
line <<
" cannot be both the target and an input!\n";
740 allSubfiles.emplace_back(
line);
746 const char *
line = argv[
a];
748 Err() <<
"could not validate argument \"" <<
line <<
"\" as input file " << std::endl;
752 Err() <<
"file " <<
line <<
" cannot be both the target and an input!\n";
756 allSubfiles.emplace_back(
line);
760 if (allSubfiles.empty()) {
761 Err() <<
"could not find any valid input file " << std::endl;
769 if (firstInput && !firstInput->
IsZombie())
781 Info(2) <<
"compression setting for meta data: " << newcomp <<
'\n';
783 Info(2) <<
"compression setting for all output: " << newcomp <<
'\n';
786 if (!fileMerger.
OutputFile(targetname,
"UPDATE", newcomp)) {
787 Err() <<
"error opening target file for update :" << targetname <<
".\n";
791 std::stringstream ss;
792 ss <<
"error opening target file (does " << targetname <<
" exist?).\n";
794 ss <<
"pass \"-f\" argument to force re-creation of output file.\n";
799 auto step = (allSubfiles.size() + nProcesses - 1) / nProcesses;
800 if (multiproc && step < 3) {
803 nProcesses = (allSubfiles.size() + step - 1) / step;
804 Info(2) <<
"each process should handle at least 3 files for efficiency."
805 " Setting the number of processes to: "
806 << nProcesses << std::endl;
811 std::vector<std::string> partialFiles;
819 auto partialTail = uuid.AsString();
820 for (
auto i = 0; (i * step) < allSubfiles.size(); i++) {
821 std::stringstream buffer;
822 buffer << workingDir <<
"/partial" << i <<
"_" << partialTail <<
".root";
823 partialFiles.emplace_back(buffer.str());
830 merger.SetFastMethod(
kFALSE);
834 Warn() <<
"Sources and Target have different compression settings\n"
835 "hadd merging will be slower\n";
839 merger.SetMergeOptions(
TString(merger.GetMergeOptions()) +
" " + cacheSize);
842 merger.SetIOFeatures(features);
847 fileMergerFlags |= extraFlags;
852 Bool_t status = merger.PartialMerge(fileMergerFlags);
857 for (
auto i =
start; i < (start + nFiles) && i < static_cast<int>(allSubfiles.size()); i++) {
858 if (!merger.
AddFile(allSubfiles[i].c_str())) {
860 Warn() <<
"skipping file with error: " << allSubfiles[i] << std::endl;
862 Err() <<
"exiting due to error in " << allSubfiles[i] << std::endl;
867 return mergeFiles(merger);
870 auto parallelMerge = [&](
int start) {
874 if (maxopenedfiles > 0) {
878 Err() <<
"error opening target partial file\n";
881 return sequentialMerge(mergerP,
start, step);
884 auto reductionFunc = [&]() {
885 for (
const auto &pf : partialFiles) {
886 fileMerger.
AddFile(pf.c_str());
888 return mergeFiles(fileMerger);
896 auto res = p.
Map(parallelMerge,
ROOT::TSeqI(0, allSubfiles.size(), step));
897 status = std::accumulate(res.begin(), res.end(), 0U) == partialFiles.size();
899 status = reductionFunc();
901 Err() <<
"failed at the parallel stage\n";
904 for (
const auto &pf : partialFiles) {
909 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
912 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
917 <<
") input (partial) files into " << targetname <<
"\n";
920 Err() <<
"failure during the merge of " << allSubfiles.size() <<
" (" << fileMerger.
GetMergeList()->
GetEntries()
921 <<
") input (partial) files into " << targetname <<
"\n";
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Signed integer 4 bytes (int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
auto Map(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times.
TIOFeatures provides the end-user with the ability to change the IO behavior of data written via a TT...
bool Set(EIOFeatures bits)
Set a specific IO feature.
This class provides a simple interface to execute the same task multiple times in parallel,...
virtual Int_t GetEntries() const
This class provides file copy and merging services.
virtual Bool_t OutputFile(const char *url, Bool_t force)
Open merger output file.
virtual Bool_t AddFile(TFile *source, Bool_t own, Bool_t cpProgress)
Add the TFile to this file merger and give ownership of the TFile to this object (unless kFALSE is re...
void SetMsgPrefix(const char *prefix)
Set the prefix to be used when printing informational message.
void SetPrintLevel(Int_t level)
void AddObjectNames(const char *name)
Add object names for PartialMerge().
@ kAll
Merge all type of objects (default).
@ kIncremental
Merge the input file with the content of the output file (if already existing).
@ kSkipListed
Skip objects specified in fObjectNames list.
@ kOnlyListed
Only the objects specified in fObjectNames list.
@ kRegular
Normal merge, overwriting the output file.
void SetMergeOptions(const TString &options)
void SetMaxOpenedFiles(Int_t newmax)
Set a limit to the number of files that TFileMerger will open simultaneously.
@ kFailOnError
The merging process will stop and yield failure when encountering invalid objects.
@ kSkipOnError
The merging process will skip invalid objects and continue.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Int_t GetCompressionSettings() const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
TString & Append(const char *cs)
This class defines a UUID (Universally Unique IDentifier), also known as GUIDs (Globally Unique IDent...
static EFlagResult FlagArg(int argc, char **argv, int &argIdxInOut, const char *flagStr, std::optional< T > &flagOut, std::optional< T > defaultVal=std::nullopt, FlagConvResult< T >(*conv)(const char *)=ConvertArg< T >)
static bool ValidCompressionSettings(int compSettings)
FlagConvResult< IntFlag_t > ConvertArg< IntFlag_t >(const char *arg)
#define PARSE_FLAG(func,...)
static FlagConvResult< Int_t > ConvertFilterType(const char *arg)
static bool FilesAreEquivalent(std::string_view source, std::string_view target)
static Int_t ParseFilterFile(const std::optional< std::string > &filterFileName, std::optional< Int_t > objectFilterType, TFileMerger &fileMerger)
static FlagConvResult< T > ConvertArg(const char *)
static constexpr int kDefaultHaddVerbosity
static std::optional< HAddArgs > ParseArgs(int argc, char **argv)
FlagConvResult< ROOT::TIOFeatures > ConvertArg< ROOT::TIOFeatures >(const char *arg)
static FlagConvResult< TString > ConvertCacheSize(const char *arg)
static EFlagResult FlagF(const char *arg, HAddArgs &args)
static EFlagResult FlagToggle(const char *arg, const char *flagStr, bool &flagOut)
static std::optional< IntFlag_t > StrToUInt(const char *str)
static constexpr const char kCommandLineOptionsHelp[]
void ToHumanReadableSize(value_type bytes, Bool_t si, Double_t *coeff, const char **units)
Return the size expressed in 'human readable' format.
EFromHumanReadableSize FromHumanReadableSize(std::string_view str, T &value)
Convert strings like the following into byte counts 5MB, 5 MB, 5M, 3.7GB, 123b, 456kB,...
bool fNoFlagsAfterPositionalArguments
bool fKeepCompressionAsIs
std::optional< TString > fCacheSize
std::optional< IntFlag_t > fCompressionSettings
std::optional< Int_t > fObjectFilterType
std::optional< IntFlag_t > fNProcesses
bool fUseFirstInputCompression
std::optional< std::string > fObjectFilterFile
std::optional< IntFlag_t > fVerbosity
std::optional< IntFlag_t > fMaxOpenedFiles
std::optional< std::string > fWorkingDir
std::optional< ROOT::TIOFeatures > fFeatures
@ kUseCompiledDefault
Use the compile-time default setting.