#include "TFileCollection.h"
#include "THashList.h"
#include "TFileInfo.h"
#include "TIterator.h"
#include "TMap.h"
#include "TObjString.h"
#include "TUri.h"
#include "TUrl.h"
#include "TSystem.h"
#include "Riostream.h"
#include "TRegexp.h"
#include "TPRegexp.h"
#include "TError.h"
ClassImp(TFileCollection)
TFileCollection::TFileCollection(const char *name, const char *title,
const char *textfile, Int_t nfiles, Int_t firstfile)
: TNamed(name, title), fList(0), fMetaDataList(0), fDefaultTree(),
fTotalSize(0), fNFiles(0), fNStagedFiles(0), fNCorruptFiles(0)
{
fList = new THashList();
fList->SetOwner();
fMetaDataList = new TList;
fMetaDataList->SetOwner();
AddFromFile(textfile, nfiles, firstfile);
}
TFileCollection::~TFileCollection()
{
delete fList;
delete fMetaDataList;
}
Int_t TFileCollection::Add(TFileInfo *info)
{
if (fList && info) {
if (!fList->FindObject(info->GetName())) {
fList->Add(info);
if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
return 1;
} else {
Warning("Add", "file: '%s' already in the list - ignoring",
info->GetCurrentUrl()->GetUrl());
}
}
return 0;
}
Int_t TFileCollection::Add(TFileCollection *coll)
{
if (fList && coll && coll->GetList()) {
TIter nxfi(coll->GetList());
TFileInfo *fi = 0;
while ((fi = (TFileInfo *) nxfi())) {
TFileInfo *info = new TFileInfo(*fi);
fList->Add(info);
if (fi->GetIndex() < 0) info->SetIndex(fList->GetSize());
}
return 1;
} else {
return 0;
}
}
Int_t TFileCollection::AddFromFile(const char *textfile, Int_t nfiles, Int_t firstfile)
{
if (!fList)
return 0;
Int_t nf = 0;
TString fn(textfile);
if (!fn.IsNull() && !gSystem->ExpandPathName(fn)) {
ifstream f;
f.open(fn);
if (f.is_open()) {
Bool_t all = (nfiles <= 0) ? kTRUE : kFALSE;
Int_t ff = (!all && (firstfile < 1)) ? 1 : firstfile;
Int_t nn = 0;
while (f.good() && (all || nf < nfiles)) {
TString line;
line.ReadToDelim(f);
if (!line.IsWhitespace() && !line.BeginsWith("#")) {
nn++;
if (all || nn >= ff) {
TFileInfo *info = new TFileInfo(line);
fList->Add(info);
if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
nf++;
}
}
}
f.close();
Update();
} else
Error("AddFromFile", "unable to open file %s (%s)", textfile, fn.Data());
}
return nf;
}
Int_t TFileCollection::Add(const char *dir)
{
Int_t nf = 0;
if (!fList)
return nf;
if (!dir || !*dir) {
Error("Add", "input dir undefined");
return nf;
}
FileStat_t st;
FileStat_t tmp;
TString baseDir = gSystem->DirName(dir);
if (gSystem->GetPathInfo(dir, st) == 0 ||
gSystem->GetPathInfo(baseDir, tmp) == 0) {
if (R_ISREG(st.fMode)) {
TFileInfo *info = new TFileInfo(dir);
info->SetBit(TFileInfo::kStaged);
Add(info);
nf++;
Update();
return nf;
} else {
void *dataSetDir = gSystem->OpenDirectory(gSystem->DirName(dir));
if (!dataSetDir) {
Error("Add", "directory %s cannot be opened",
gSystem->DirName(dir));
} else {
const char *ent;
TString filesExp(TString("^") + gSystem->BaseName(dir) + "$");
filesExp.ReplaceAll("*",".*");
TRegexp rg(filesExp);
while ((ent = gSystem->GetDirEntry(dataSetDir))) {
TString entryString(ent);
if (entryString.Index(rg) != kNPOS) {
TString fn = gSystem->DirName(dir);
fn += "/";
fn += ent;
gSystem->GetPathInfo(fn, st);
if (R_ISREG(st.fMode)) {
TFileInfo *info = new TFileInfo(fn);
info->SetBit(TFileInfo::kStaged);
Add(info);
nf++;
}
}
}
gSystem->FreeDirectory(dataSetDir);
Update();
}
}
}
return nf;
}
Int_t TFileCollection::RemoveDuplicates()
{
THashList *hl = new THashList;
hl->SetOwner();
Int_t n0 = fList->GetSize();
TIter nxfi(fList);
TFileInfo *fi = 0;
while ((fi = (TFileInfo *)nxfi())) {
if (!(hl->FindObject(fi->GetUUID()->AsString()))) {
fList->Remove(fi);
fi->SetName(fi->GetUUID()->AsString());
hl->Add(fi);
}
}
delete fList;
fList = hl;
Int_t nr = n0 - fList->GetSize();
if (nr > 0)
Info("RemoveDuplicates", "%d duplicates found and removed", nr);
return nr;
}
TFileCollection *TFileCollection::GetStagedSubset()
{
if (!fList)
return 0;
TFileCollection *subset = new TFileCollection(GetName(), GetTitle());
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted))
subset->Add(fileInfo);
}
subset->Update();
return subset;
}
Long64_t TFileCollection::Merge(TCollection *li)
{
if (!li) return 0;
if (li->IsEmpty()) return 0;
Long64_t nentries=0;
TIter next(li);
while (TObject *o = next()) {
TFileCollection* coll = dynamic_cast<TFileCollection*> (o);
if (!coll) {
Error("Add", "attempt to add object of class: %s to a %s",
o->ClassName(),this->ClassName());
return -1;
}
Add(coll);
nentries++;
}
Update();
return nentries;
}
Int_t TFileCollection::Update(Long64_t avgsize)
{
if (!fList)
return -1;
Int_t rc = 0;
fTotalSize = 0;
fNStagedFiles = 0;
fNCorruptFiles = 0;
TIter nxm(fMetaDataList);
TFileInfoMeta *m = 0;
while ((m = (TFileInfoMeta *)nxm())) {
if (!(m->TestBit(TFileInfoMeta::kExternal))) {
fMetaDataList->Remove(m);
delete m;
}
}
fNFiles = fList->GetEntries();
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*> (iter.Next()))) {
if (fileInfo->GetSize() > 0) {
fTotalSize += fileInfo->GetSize();
} else {
rc = 1;
if (avgsize > 0) {
rc = 2;
fTotalSize += avgsize;
}
}
if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted)) {
fNStagedFiles++;
if (fileInfo->GetMetaDataList()) {
TIter metaDataIter(fileInfo->GetMetaDataList());
TObject *obj = 0;
while ((obj = metaDataIter.Next())) {
TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(obj);
if (!metaData)
continue;
if (!metaData->IsTree())
continue;
TFileInfoMeta *metaDataSum = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(metaData->GetName()));
Bool_t newObj = kFALSE;
if (!metaDataSum) {
metaDataSum = new TFileInfoMeta(metaData->GetName(), metaData->GetTitle());
fMetaDataList->Add(metaDataSum);
newObj = kTRUE;
}
if (newObj)
metaDataSum->SetEntries(metaData->GetEntries());
else
metaDataSum->SetEntries(metaDataSum->GetEntries() + metaData->GetEntries());
}
}
}
if (fileInfo->TestBit(TFileInfo::kCorrupted))
fNCorruptFiles++;
}
return rc;
}
void TFileCollection::Print(Option_t *option) const
{
TString opt(option);
TPMERegexp re("(^|;| )filter:([SsCc]+)( |;|$)", 4);
if (re.Match(option) == 4) {
TString showOnly = re[2];
PrintDetailed(showOnly);
return;
}
Printf("TFileCollection %s - %s contains: %lld files with a size of"
" %lld bytes, %.1f %% staged - default tree name: '%s'",
GetName(), GetTitle(), fNFiles, fTotalSize, GetStagedPercentage(),
GetDefaultTreeName());
if (opt.Contains("M", TString::kIgnoreCase)) {
Printf("The files contain the following trees:");
TIter metaDataIter(fMetaDataList);
TFileInfoMeta* metaData = 0;
while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
if (!metaData->IsTree())
continue;
Printf("Tree %s: %lld events", metaData->GetName(), metaData->GetEntries());
}
}
if (fList && opt.Contains("F", TString::kIgnoreCase)) {
Printf("The collection contains the following files:");
if (!opt.Contains("L") && !fDefaultTree.IsNull())
opt += TString::Format(" T:%s", fDefaultTree.Data());
fList->Print(opt);
}
}
void TFileCollection::PrintDetailed(TString &showOnly) const
{
Bool_t bS, bs, bC, bc;
bS = bs = bC = bc = kFALSE;
if (showOnly.Index('S') >= 0) bS = kTRUE;
if (showOnly.Index('s') >= 0) bs = kTRUE;
if (showOnly.Index('C') >= 0) bC = kTRUE;
if (showOnly.Index('c') >= 0) bc = kTRUE;
if (!bc && !bC) bc = bC = kTRUE;
if (!bs && !bS) bs = bS = kTRUE;
TIter it(fList);
TFileInfo *info;
UInt_t countAll = 0;
UInt_t countMatch = 0;
Printf("\033[1m #. SC | Entries | Size | URL\033[m");
TString um;
Double_t sz;
while ((info = dynamic_cast<TFileInfo *>(it.Next()))) {
Bool_t s = info->TestBit(TFileInfo::kStaged);
Bool_t c = info->TestBit(TFileInfo::kCorrupted);
TUrl *url;
countAll++;
if ( ((s && bS) || (!s && bs)) && ((c && bC) || (!c && bc)) ) {
TFileInfoMeta *meta = info->GetMetaData();
Int_t entries = -1;
if (meta) entries = meta->GetEntries();
FormatSize(info->GetSize(), um, sz);
info->ResetUrl();
TUrl *curUrl = info->GetCurrentUrl();
const char *curUrlStr = curUrl ? curUrl->GetUrl() : "n.a.";
Printf("\033[1m%4u.\033[m %c%c | %-7s | %6.1lf %s | %s",
++countMatch,
(s ? 'S' : 's'), (c ? 'C' : 'c'),
((entries > 0) ? Form("% 7d", entries) : "n.a."),
sz, um.Data(), curUrlStr);
info->NextUrl();
while ((url = info->NextUrl())) {
Printf(" | | | %s", url->GetUrl());
}
info->ResetUrl();
}
}
if (countAll) {
Printf(">> There are \033[1m%u\033[m file(s) in dataset: "
"\033[1m%u (%5.1f%%)\033[m matched your criteria (%s)",
countAll, countMatch,
100.*(Float_t)countMatch/(Float_t)countAll, showOnly.Data());
FormatSize(fTotalSize, um, sz);
Printf(">> Total size : \033[1m%.1f %s\033[m", sz, um.Data());
Printf(">> Staged (S) : \033[1m%5.1f %%\033[m", GetStagedPercentage());
Printf(">> Corrupted (C) : \033[1m%5.1f %%\033[m",
GetCorruptedPercentage());
}
else {
Printf(">> No files in dataset");
}
const char *treeName = GetDefaultTreeName();
Printf(">> Default tree : \033[1m%s\033[m",
(treeName ? treeName : "(no default tree)"));
}
void TFileCollection::FormatSize(Long64_t bytes, TString &um,
Double_t &size) const
{
static const char *ums[] = { "byt", "KiB", "MiB", "GiB", "TiB" };
Int_t maxDiv = sizeof(ums)/sizeof(const char *);
Int_t nDiv = 0;
Double_t b = bytes;
while ((b >= 1024.) && (nDiv+1 < maxDiv)) {
b /= 1024.;
nDiv++;
}
um = ums[nDiv];
size = b;
}
void TFileCollection::SetAnchor(const char *anchor)
{
if (!fList)
return;
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
fileInfo->ResetUrl();
TUrl *url = 0;
while ((url = fileInfo->NextUrl()))
url->SetAnchor(anchor);
fileInfo->ResetUrl();
}
}
void TFileCollection::SetBitAll(UInt_t f)
{
if (!fList)
return;
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
fileInfo->SetBit(f);
}
void TFileCollection::ResetBitAll(UInt_t f)
{
if (!fList)
return;
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
fileInfo->ResetBit(f);
}
const char *TFileCollection::GetDefaultTreeName() const
{
if (fDefaultTree.Length() > 0)
return fDefaultTree;
TIter metaDataIter(fMetaDataList);
TFileInfoMeta *metaData = 0;
while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
if (!metaData->IsTree())
continue;
return metaData->GetName();
}
return 0;
}
Long64_t TFileCollection::GetTotalEntries(const char *tree) const
{
if (!tree || !*tree) {
tree = GetDefaultTreeName();
if (!tree)
return -1;
}
TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(tree));
if (!metaData)
return -1;
return metaData->GetEntries();
}
TFileInfoMeta *TFileCollection::GetMetaData(const char *meta) const
{
if (!meta || !*meta)
return 0;
return dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(meta));
}
void TFileCollection::SetDefaultMetaData(const char *meta)
{
TFileInfoMeta *fim = GetMetaData(meta);
if (fim) {
fMetaDataList->Remove(fim);
fMetaDataList->AddFirst(fim);
}
}
void TFileCollection::RemoveMetaData(const char *meta)
{
if (fList) {
TIter iter(fList);
TFileInfo *fileInfo = 0;
while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
fileInfo->RemoveMetaData(meta);
}
if (meta) {
TObject* obj = fMetaDataList->FindObject("meta");
if (obj) {
fMetaDataList->Remove(obj);
delete obj;
}
} else
fMetaDataList->Clear();
}
void TFileCollection::Sort(Bool_t useindex)
{
if (!fList)
return;
if (useindex) {
SetBitAll(TFileInfo::kSortWithIndex);
} else {
ResetBitAll(TFileInfo::kSortWithIndex);
}
fList->Sort();
}
TObjString *TFileCollection::ExportInfo(const char *name, Int_t popt)
{
TString treeInfo;
if (GetDefaultTreeName()) {
TFileInfoMeta* meta = GetMetaData(GetDefaultTreeName());
if (popt == 1) {
treeInfo = GetDefaultTreeName();
if (meta)
treeInfo += TString::Format(", %lld entries", meta->GetEntries());
TFileInfoMeta *frac = GetMetaData("/FractionOfTotal");
if (frac)
treeInfo += TString::Format(", %3.1f %% of total", frac->GetEntries() / 10.);
} else {
treeInfo.Form(" %s ", GetDefaultTreeName());
if (treeInfo.Length() > 14) treeInfo.Replace(13, 1, '>');
treeInfo.Resize(14);
if (meta) {
if (meta->GetEntries() > 99999999) {
treeInfo += TString::Format("| %8lld ", meta->GetEntries());
} else {
treeInfo += TString::Format("| %8.4g ", (Double_t) meta->GetEntries());
}
}
}
} else {
treeInfo = " N/A";
}
if (popt == 0) treeInfo.Resize(25);
const char *unit[4] = {"kB", "MB", "GB", "TB"};
Int_t k = 0;
Long64_t refsz = 1024;
Long64_t xsz = (Long64_t) (GetTotalSize() / refsz);
while (xsz > 1024 && k < 3) {
k++;
refsz *= 1024;
xsz = (Long64_t) (GetTotalSize() / refsz);
}
TString dsname(name);
if (dsname.IsNull()) dsname = GetName();
TObjString *outs = 0;
if (popt == 1) {
outs = new TObjString(Form("%s %lld files, %lld %s, staged %d %%, tree: %s", dsname.Data(),
GetNFiles(), xsz, unit[k],
(Int_t)GetStagedPercentage(), treeInfo.Data()));
} else {
outs = new TObjString(Form("%s| %7lld |%s| %5lld %s | %3d %%", dsname.Data(),
GetNFiles(), treeInfo.Data(), xsz, unit[k],
(Int_t)GetStagedPercentage()));
}
return outs;
}
TFileCollection *TFileCollection::GetFilesOnServer(const char *server)
{
TFileCollection *fc = (TFileCollection *)0;
if (!server || strlen(server) <= 0) {
Info("GetFilesOnServer", "server undefined - do nothing");
return fc;
}
if (!fList || fList->GetSize() <= 0) {
Info("GetFilesOnServer", "the list is empty - do nothing");
return fc;
}
TUri uri(server);
TString srv, scheme("root"), port;
if (uri.GetScheme() != "") scheme = uri.GetScheme();
if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
srv.Form("%s://%s%s", scheme.Data(), TUrl(server).GetHostFQDN(), port.Data());
if (gDebug > 0)
Info("GetFilesOnServer", "searching for files on server: '%s' (input: '%s')",
srv.Data(), server);
fc = new TFileCollection(GetName());
TString title;
if (GetTitle() && strlen(GetTitle()) > 0) {
title.Form("%s (subset on server %s)", GetTitle(), srv.Data());
} else {
title.Form("subset of '%s' on server %s", GetName(), srv.Data());
}
fc->SetTitle(title.Data());
fc->SetDefaultTreeName(GetDefaultTreeName());
srv.Insert(0, "^");
TIter nxf(fList);
TFileInfo *fi = 0;
while ((fi = (TFileInfo *)nxf())) {
TUrl *xu = 0;
if ((xu = fi->FindByUrl(srv.Data()))) {
TFileInfo *nfi = new TFileInfo(xu->GetUrl(), fi->GetSize(),
fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
if (fi->GetMetaDataList()) {
TIter nxm(fi->GetMetaDataList());
TFileInfoMeta *md = 0;
while ((md = (TFileInfoMeta *) nxm())) {
nfi->AddMetaData(new TFileInfoMeta(*md));
}
}
if (fi->TestBit(TFileInfo::kStaged)) nfi->SetBit(TFileInfo::kStaged);
if (fi->TestBit(TFileInfo::kCorrupted)) nfi->SetBit(TFileInfo::kCorrupted);
if (gDebug > 1)
Info("GetFilesOnServer", "adding: %s", xu->GetUrl());
fc->Add(nfi);
}
}
if (fc->GetList()->GetSize() <= 0) {
delete fc;
fc = 0;
Info("GetFilesOnServer", "dataset '%s' has no files on server: '%s' (searched for: '%s')",
GetName(), server, srv.Data());
}
if (fc) {
fc->Update();
Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
m->SetBit(TFileInfoMeta::kExternal);
fc->AddMetaData(m);
}
return fc;
}
TMap *TFileCollection::GetFilesPerServer(const char *exclude, Bool_t curronly)
{
TMap *dsmap = 0;
if (!fList || fList->GetSize() <= 0) {
Info("GetFilesPerServer", "the list is empty - do nothing");
return dsmap;
}
THashList *excl = 0;
if (exclude && strlen(exclude) > 0) {
excl = new THashList;
excl->SetOwner();
TUri uri;
TString srvs(exclude), s, srv, scheme, port;
Int_t from = 0;
while (srvs.Tokenize(s, from, ",")) {
uri.SetUri(s.Data());
scheme = "root";
port = "";
if (uri.GetScheme() != "") scheme = uri.GetScheme();
if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
srv.Form("%s://%s%s", scheme.Data(), TUrl(s.Data()).GetHostFQDN(), port.Data());
excl->Add(new TObjString(srv.Data()));
}
}
dsmap = new TMap();
TIter nxf(fList);
TFileInfo *fi = 0;
TUri uri;
TString key;
TFileCollection *fc = 0;
while ((fi = (TFileInfo *)nxf())) {
TUrl *curl = fi->GetCurrentUrl();
if (!curronly) fi->ResetUrl();
TUrl *xurl = 0;
while ((xurl = (curronly) ? curl : fi->NextUrl())) {
key.Form("%s://%s", xurl->GetProtocol(), xurl->GetHostFQDN());
if (excl && excl->FindObject(key.Data())) {
if (curronly) break;
continue;
} else if (excl && xurl->GetPort() > 0) {
key += TString::Format(":%d", xurl->GetPort());
if (excl->FindObject(key.Data())) {
if (curronly) break;
continue;
}
}
TPair *ent = 0;
if (!(ent = (TPair *) dsmap->FindObject(key.Data()))) {
fc = new TFileCollection(GetName());
TString title;
if (GetTitle() && strlen(GetTitle()) > 0) {
title.Form("%s (subset on server %s)", GetTitle(), key.Data());
} else {
title.Form("subset of '%s' on server %s", GetName(), key.Data());
}
fc->SetTitle(title.Data());
fc->SetDefaultTreeName(GetDefaultTreeName());
dsmap->Add(new TObjString(key.Data()), fc);
if (gDebug > 0)
Info("GetFilesPerServer", "found server: '%s' (fc: %p)", key.Data(), fc);
} else {
fc = (TFileCollection *) ent->Value();
}
TFileInfo *nfi = new TFileInfo(xurl->GetUrl(kTRUE), fi->GetSize(),
fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
if (fi->GetMetaDataList()) {
TIter nxm(fi->GetMetaDataList());
TFileInfoMeta *md = 0;
while ((md = (TFileInfoMeta *) nxm())) {
nfi->AddMetaData(new TFileInfoMeta(*md));
}
}
if (fi->TestBit(TFileInfo::kStaged)) nfi->SetBit(TFileInfo::kStaged);
if (fi->TestBit(TFileInfo::kCorrupted)) nfi->SetBit(TFileInfo::kCorrupted);
fc->Add(nfi);
if (curronly) break;
}
fi->SetCurrentUrl(curl);
}
TIter nxk(dsmap);
TObject *k = 0;
while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
fc->Update();
Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
m->SetBit(TFileInfoMeta::kExternal);
fc->AddMetaData(m);
}
if (excl) delete excl;
return dsmap;
}
Bool_t TFileCollection::AddMetaData(TObject *meta)
{
if (meta) {
if (!fMetaDataList) {
fMetaDataList = new TList;
fMetaDataList->SetOwner();
}
fMetaDataList->Add(meta);
return kTRUE;
}
return kFALSE;
}