Logo ROOT   6.14/05
Reference Guide
TFileCollection.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Gerhard Erich Bruckner, Jan Fiete Grosse-Oetringhaus 04/06/07
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TFileCollection
13 \ingroup Base
14 
15 Class that contains a list of TFileInfo's and accumulated meta
16 data information about its entries. This class is used to describe
17 file sets as stored by Grid file catalogs, by PROOF or any other
18 collection of TFile names.
19 */
20 
21 #include "TFileCollection.h"
22 #include "THashList.h"
23 #include "TFileInfo.h"
24 #include "TIterator.h"
25 #include "TMap.h"
26 #include "TObjString.h"
27 #include "TUri.h"
28 #include "TUrl.h"
29 #include "TSystem.h"
30 #include "Riostream.h"
31 #include "TRegexp.h"
32 #include "TPRegexp.h"
33 #include "TError.h"
34 
35 
37 
38 ////////////////////////////////////////////////////////////////////////////////
39 /// TFileCollection constructor. Specify a name and title describing
40 /// the list. If textfile is specified the file is opened and a
41 /// TFileCollection is created containing the files in the textfile.
42 
43 TFileCollection::TFileCollection(const char *name, const char *title,
44  const char *textfile, Int_t nfiles, Int_t firstfile)
45  : TNamed(name, title), fList(0), fMetaDataList(0), fDefaultTree(),
46  fTotalSize(0), fNFiles(0), fNStagedFiles(0), fNCorruptFiles(0)
47 {
48  fList = new THashList();
49  fList->SetOwner();
50 
51  fMetaDataList = new TList;
53 
54  AddFromFile(textfile, nfiles, firstfile);
55 }
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Cleanup.
59 
61 {
62  delete fList;
63  delete fMetaDataList;
64 }
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Add TFileInfo to the collection.
68 
70 {
71  if (fList && info) {
72  if (!fList->FindObject(info->GetName())) {
73  fList->Add(info);
74  if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
75  Update();
76  return 1;
77  } else {
78  Warning("Add", "file: '%s' already in the list - ignoring",
79  info->GetCurrentUrl()->GetUrl());
80  }
81  }
82  return 0;
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// Add content of the TFileCollection to this collection.
87 
89 {
90  if (fList && coll && coll->GetList()) {
91  TIter nxfi(coll->GetList());
92  TFileInfo *fi = 0;
93  while ((fi = (TFileInfo *) nxfi())) {
94  TFileInfo *info = new TFileInfo(*fi);
95  fList->Add(info);
96  if (fi->GetIndex() < 0) info->SetIndex(fList->GetSize());
97  }
98  Update();
99  return 1;
100  } else {
101  return 0;
102  }
103 }
104 
105 ////////////////////////////////////////////////////////////////////////////////
106 /// Add file names contained in the specified text file.
107 /// The file should contain one url per line; empty lines or lines starting with '#'
108 /// (commented lines) are ignored.
109 /// If nfiles > 0 only nfiles files are added, starting from file 'firstfile' (>= 1).
110 /// The method returns the number of added files.
111 
112 Int_t TFileCollection::AddFromFile(const char *textfile, Int_t nfiles, Int_t firstfile)
113 {
114  if (!fList)
115  return 0;
116 
117  Int_t nf = 0;
118  TString fn(textfile);
119  if (!fn.IsNull() && !gSystem->ExpandPathName(fn)) {
120  std::ifstream f;
121  f.open(fn);
122  if (f.is_open()) {
123  Bool_t all = (nfiles <= 0) ? kTRUE : kFALSE;
124  Int_t ff = (!all && (firstfile < 1)) ? 1 : firstfile;
125  Int_t nn = 0;
126  while (f.good() && (all || nf < nfiles)) {
127  TString line;
128  line.ReadToDelim(f);
129  // Skip commented or empty lines
130  if (!line.IsWhitespace() && !line.BeginsWith("#")) {
131  nn++;
132  if (all || nn >= ff) {
133  TFileInfo *info = new TFileInfo(line);
134  fList->Add(info);
135  if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
136  nf++;
137  }
138  }
139  }
140  f.close();
141  Update();
142  } else
143  Error("AddFromFile", "unable to open file %s (%s)", textfile, fn.Data());
144  }
145  return nf;
146 }
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// Add all files matching the specified pattern to the collection.
150 /// 'dir' can include wildcards after the last slash, which causes all
151 /// matching files in that directory to be added.
152 /// If dir is the full path of a file, only one element is added.
153 /// Return value is the number of added files.
154 
155 Int_t TFileCollection::Add(const char *dir)
156 {
157  Int_t nf = 0;
158 
159  if (!fList)
160  return nf;
161 
162  if (!dir || !*dir) {
163  Error("Add", "input dir undefined");
164  return nf;
165  }
166 
167  FileStat_t st;
168  FileStat_t tmp;
169  TString baseDir = gSystem->DirName(dir);
170  // if the 'dir' or its base dir exist
171  if (gSystem->GetPathInfo(dir, st) == 0 ||
172  gSystem->GetPathInfo(baseDir, tmp) == 0) {
173  // If 'dir' points to a single file, add to the list and exit
174  if (R_ISREG(st.fMode)) {
175  // regular, single file
176  TFileInfo *info = new TFileInfo(dir);
177  info->SetBit(TFileInfo::kStaged);
178  Add(info);
179  nf++;
180  Update();
181  return nf;
182  } else {
183  void *dataSetDir = gSystem->OpenDirectory(gSystem->DirName(dir));
184  if (!dataSetDir) {
185  // directory cannot be opened
186  Error("Add", "directory %s cannot be opened",
187  gSystem->DirName(dir));
188  } else {
189  const char *ent;
190  TString filesExp(TString("^") + gSystem->BaseName(dir) + "$");
191  filesExp.ReplaceAll("*",".*");
192  TRegexp rg(filesExp);
193  while ((ent = gSystem->GetDirEntry(dataSetDir))) {
194  TString entryString(ent);
195  if (entryString.Index(rg) != kNPOS) {
196  // matching dir entry
197  TString fn = gSystem->DirName(dir);
198  fn += "/";
199  fn += ent;
200  gSystem->GetPathInfo(fn, st);
201  if (R_ISREG(st.fMode)) {
202  // regular file
203  TFileInfo *info = new TFileInfo(fn);
204  info->SetBit(TFileInfo::kStaged);
205  Add(info);
206  nf++;
207  }
208  }
209  }
210  // close the directory
211  gSystem->FreeDirectory(dataSetDir);
212  Update();
213  }
214  }
215  }
216  return nf;
217 }
218 
219 ////////////////////////////////////////////////////////////////////////////////
220 /// Remove duplicates based on the UUID, typically after a verification.
221 /// Return the number of entries removed.
222 
224 {
225  THashList *hl = new THashList;
226  hl->SetOwner();
227 
228  Int_t n0 = fList->GetSize();
229  TIter nxfi(fList);
230  TFileInfo *fi = 0;
231  while ((fi = (TFileInfo *)nxfi())) {
232  if (!(hl->FindObject(fi->GetUUID()->AsString()))) {
233  // We hash on the UUID
234  fList->Remove(fi);
235  fi->SetName(fi->GetUUID()->AsString());
236  hl->Add(fi);
237  }
238  }
239  delete fList;
240  fList = hl;
241  // How many removed?
242  Int_t nr = n0 - fList->GetSize();
243  if (nr > 0)
244  Info("RemoveDuplicates", "%d duplicates found and removed", nr);
245  // Done
246  return nr;
247 }
248 
249 ////////////////////////////////////////////////////////////////////////////////
250 /// Creates a subset of the files that have the kStaged & !kCorrupted bit set.
251 
253 {
254  if (!fList)
255  return 0;
256 
257  TFileCollection *subset = new TFileCollection(GetName(), GetTitle());
258 
259  TIter iter(fList);
260  TFileInfo *fileInfo = 0;
261  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
262  if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted))
263  subset->Add(fileInfo);
264  }
265 
266  subset->Update();
267 
268  return subset;
269 }
270 
271 ////////////////////////////////////////////////////////////////////////////////
272 /// Merge all TFileCollection objects in li into this TFileCollection object.
273 /// Updates counters at the end.
274 /// Returns the number of merged collections or -1 in case of error.
275 
277 {
278 
279  if (!li) return 0;
280  if (li->IsEmpty()) return 0;
281 
282  Long64_t nentries=0;
283  TIter next(li);
284  while (TObject *o = next()) {
285  TFileCollection* coll = dynamic_cast<TFileCollection*> (o);
286  if (!coll) {
287  Error("Add", "attempt to add object of class: %s to a %s",
288  o->ClassName(),this->ClassName());
289  return -1;
290  }
291  Add(coll);
292  nentries++;
293  }
294  Update();
295 
296  return nentries;
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// Update accumulated information about the elements of the collection
301 /// (e.g. fTotalSize). If 'avgsize' > 0, use an average file size of 'avgsize'
302 /// bytes when the size info is not available.
303 /// Also updates the meta data information by summarizing
304 /// the meta data of the contained objects.
305 /// Return -1 in case of any failure, 0 if the total size is exact, 1 if
306 /// incomplete, 2 if complete but (at least partially) estimated.
307 
309 {
310  if (!fList)
311  return -1;
312 
313  Int_t rc = 0;
314 
315  fTotalSize = 0;
316  fNStagedFiles = 0;
317  fNCorruptFiles = 0;
318 
319  // Clear internal meta information which is going to be rebuilt in this
320  // function
321  TIter nxm(fMetaDataList);
322  TFileInfoMeta *m = 0;
323  while ((m = (TFileInfoMeta *)nxm())) {
324  if (!(m->TestBit(TFileInfoMeta::kExternal))) {
325  fMetaDataList->Remove(m);
326  delete m;
327  }
328  }
329 
330  fNFiles = fList->GetEntries();
331 
332  TIter iter(fList);
333  TFileInfo *fileInfo = 0;
334  while ((fileInfo = dynamic_cast<TFileInfo*> (iter.Next()))) {
335 
336  if (fileInfo->GetSize() > 0) {
337  fTotalSize += fileInfo->GetSize();
338  } else {
339  rc = 1;
340  if (avgsize > 0) {
341  rc = 2;
342  fTotalSize += avgsize;
343  }
344  }
345 
346  if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted)) {
347  fNStagedFiles++;
348 
349  if (fileInfo->GetMetaDataList()) {
350  TIter metaDataIter(fileInfo->GetMetaDataList());
351  // other than TFileInfoMeta is also allowed in list
352  TObject *obj = 0;
353  while ((obj = metaDataIter.Next())) {
354  TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(obj);
355  if (!metaData)
356  continue;
357  if (!metaData->IsTree())
358  continue;
359 
360  // find corresponding entry in TFileCollection's meta data
361  TFileInfoMeta *metaDataSum = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(metaData->GetName()));
362  Bool_t newObj = kFALSE;
363  if (!metaDataSum) {
364  // create explicitly, there are some values that do not make sense for the sum
365  metaDataSum = new TFileInfoMeta(metaData->GetName(), metaData->GetTitle());
366  fMetaDataList->Add(metaDataSum);
367  newObj = kTRUE;
368  }
369 
370  // sum the values
371  if (newObj)
372  metaDataSum->SetEntries(metaData->GetEntries());
373  else
374  metaDataSum->SetEntries(metaDataSum->GetEntries() + metaData->GetEntries());
375  }
376  }
377  }
378  if (fileInfo->TestBit(TFileInfo::kCorrupted))
379  fNCorruptFiles++;
380  }
381 
382  // Done
383  return rc;
384 }
385 
386 ////////////////////////////////////////////////////////////////////////////////
387 /// Prints the contents of the TFileCollection.
388 /// If option contains:
389 ///
390 /// - 'M' print global meta information
391 /// - 'F' print all the files in the collection in compact form
392 /// (current url, default tree name|class|entries, md5)
393 /// - 'L' together with 'F', print all the files in the collection
394 /// in long form (uuid, md5, all URLs, all meta objects; on
395 /// many lines)
396 /// - "filter:[SsCc]" invokes PrintDetailed() which prints out dataset
397 /// content in a formatted fashion by filtering on files
398 /// which are (S)taged or not (s), (C)orrupted or not (c)
399 
400 void TFileCollection::Print(Option_t *option) const
401 {
402  TString opt(option);
403  TPMERegexp re("(^|;| )filter:([SsCc]+)( |;|$)", 4);
404  if (re.Match(option) == 4) {
405  TString showOnly = re[2];
406  PrintDetailed(showOnly);
407  return;
408  }
409 
410  Printf("TFileCollection %s - %s contains: %lld files with a size of"
411  " %lld bytes, %.1f %% staged - default tree name: '%s'",
414 
415  if (opt.Contains("M", TString::kIgnoreCase)) {
416  Printf("The files contain the following trees:");
417 
418  TIter metaDataIter(fMetaDataList);
419  TFileInfoMeta* metaData = 0;
420  while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
421  if (!metaData->IsTree())
422  continue;
423 
424  Printf("Tree %s: %lld events", metaData->GetName(), metaData->GetEntries());
425  }
426  }
427 
428  if (fList && opt.Contains("F", TString::kIgnoreCase)) {
429  Printf("The collection contains the following files:");
430  if (!opt.Contains("L") && !fDefaultTree.IsNull())
431  opt += TString::Format(" T:%s", fDefaultTree.Data());
432  fList->Print(opt);
433  }
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 /// Print detailed.
438 
440 {
441  Bool_t bS, bs, bC, bc;
442  bS = bs = bC = bc = kFALSE;
443 
444  if (showOnly.Index('S') >= 0) bS = kTRUE;
445  if (showOnly.Index('s') >= 0) bs = kTRUE;
446  if (showOnly.Index('C') >= 0) bC = kTRUE;
447  if (showOnly.Index('c') >= 0) bc = kTRUE;
448 
449  // If Ss (or Cc) omitted, show both Ss (or Cc)
450  if (!bc && !bC) bc = bC = kTRUE;
451  if (!bs && !bS) bs = bS = kTRUE;
452 
453  TIter it(fList);
454  TFileInfo *info;
455  UInt_t countAll = 0;
456  UInt_t countMatch = 0;
457 
458  Printf("\033[1m #. SC | Entries | Size | URL\033[m");
459 
460  TString um;
461  Double_t sz;
462 
463  while ((info = dynamic_cast<TFileInfo *>(it.Next()))) {
464 
467 
468  TUrl *url;
469 
470  countAll++;
471 
472  if ( ((s && bS) || (!s && bs)) && ((c && bC) || (!c && bc)) ) {
473 
474  TFileInfoMeta *meta = info->GetMetaData(); // gets the first one
475  Int_t entries = -1;
476 
477  if (meta) entries = meta->GetEntries();
478 
479  FormatSize(info->GetSize(), um, sz);
480 
481  // First line: current URL with all information
482  info->ResetUrl();
483  TUrl *curUrl = info->GetCurrentUrl();
484  const char *curUrlStr = curUrl ? curUrl->GetUrl() : "n.a.";
485  Printf("\033[1m%4u.\033[m %c%c | %-7s | %6.1lf %s | %s",
486  ++countMatch,
487  (s ? 'S' : 's'), (c ? 'C' : 'c'),
488  ((entries > 0) ? Form("% 7d", entries) : "n.a."),
489  sz, um.Data(), curUrlStr);
490  info->NextUrl();
491 
492  // Every other URL shown below current one
493  while ((url = info->NextUrl())) {
494  Printf(" | | | %s", url->GetUrl());
495  }
496  info->ResetUrl();
497 
498  } // end match filters
499 
500  } // end loop over entries
501 
502  if (countAll) {
503 
504  Printf(">> There are \033[1m%u\033[m file(s) in dataset: "
505  "\033[1m%u (%5.1f%%)\033[m matched your criteria (%s)",
506  countAll, countMatch,
507  100.*(Float_t)countMatch/(Float_t)countAll, showOnly.Data());
508 
509  FormatSize(fTotalSize, um, sz);
510  Printf(">> Total size : \033[1m%.1f %s\033[m", sz, um.Data());
511  Printf(">> Staged (S) : \033[1m%5.1f %%\033[m", GetStagedPercentage());
512  Printf(">> Corrupted (C) : \033[1m%5.1f %%\033[m",
514 
515  }
516  else {
517  Printf(">> No files in dataset");
518  }
519 
520  const char *treeName = GetDefaultTreeName();
521  Printf(">> Default tree : \033[1m%s\033[m",
522  (treeName ? treeName : "(no default tree)"));
523 
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 /// Format size.
528 
530  Double_t &size) const
531 {
532  static const char *ums[] = { "byt", "KiB", "MiB", "GiB", "TiB" };
533  Int_t maxDiv = sizeof(ums)/sizeof(const char *);
534  Int_t nDiv = 0;
535  Double_t b = bytes;
536 
537  while ((b >= 1024.) && (nDiv+1 < maxDiv)) {
538  b /= 1024.;
539  nDiv++;
540  }
541 
542  um = ums[nDiv];
543  size = b;
544 }
545 
546 ////////////////////////////////////////////////////////////////////////////////
547 /// Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
548 
549 void TFileCollection::SetAnchor(const char *anchor)
550 {
551  if (!fList)
552  return;
553 
554  TIter iter(fList);
555  TFileInfo *fileInfo = 0;
556  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
557  fileInfo->ResetUrl();
558  TUrl *url = 0;
559  while ((url = fileInfo->NextUrl()))
560  url->SetAnchor(anchor);
561  fileInfo->ResetUrl();
562  }
563 }
564 
565 ////////////////////////////////////////////////////////////////////////////////
566 /// Set the bit for all TFileInfos
567 
569 {
570  if (!fList)
571  return;
572 
573  TIter iter(fList);
574  TFileInfo *fileInfo = 0;
575  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
576  fileInfo->SetBit(f);
577 }
578 
579 ////////////////////////////////////////////////////////////////////////////////
580 /// Reset the bit for all TFileInfos
581 
583 {
584  if (!fList)
585  return;
586 
587  TIter iter(fList);
588  TFileInfo *fileInfo = 0;
589  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
590  fileInfo->ResetBit(f);
591 }
592 
593 ////////////////////////////////////////////////////////////////////////////////
594 /// Returns the tree set with SetDefaultTreeName if set
595 /// Returns the name of the first tree in the meta data list.
596 /// Returns 0 in case no trees are found in the meta data list.
597 
599 {
600  if (fDefaultTree.Length() > 0)
601  return fDefaultTree;
602 
603  TIter metaDataIter(fMetaDataList);
604  TFileInfoMeta *metaData = 0;
605  while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
606  if (!metaData->IsTree())
607  continue;
608  return metaData->GetName();
609  }
610  return 0;
611 }
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 /// Returns the number of entries for the specified tree (retrieved from meta data).
615 /// If tree is not specified, use the default tree name.
616 /// Returns -1 in case the specified tree is not found.
617 
619 {
620  if (!tree || !*tree) {
621  tree = GetDefaultTreeName();
622  if (!tree)
623  return -1;
624  }
625 
626  TFileInfoMeta *metaData = dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(tree));
627  if (!metaData)
628  return -1;
629 
630  return metaData->GetEntries();
631 }
632 
633 ////////////////////////////////////////////////////////////////////////////////
634 /// Returns the meta data object with the specified meta name.
635 /// Returns 0 in case specified meta data is not found.
636 
638 {
639  if (!meta || !*meta)
640  return 0;
641 
642  return dynamic_cast<TFileInfoMeta*>(fMetaDataList->FindObject(meta));
643 }
644 
645 ////////////////////////////////////////////////////////////////////////////////
646 /// Moves the indicated meta data in the first position, so that
647 /// it becomes effectively the default.
648 
650 {
651  TFileInfoMeta *fim = GetMetaData(meta);
652  if (fim) {
653  fMetaDataList->Remove(fim);
654  fMetaDataList->AddFirst(fim);
655  }
656 }
657 
658 ////////////////////////////////////////////////////////////////////////////////
659 /// Removes the indicated meta data object in all TFileInfos and this object
660 /// If no name is given all metadata is removed
661 
662 void TFileCollection::RemoveMetaData(const char *meta)
663 {
664  if (fList) {
665  TIter iter(fList);
666  TFileInfo *fileInfo = 0;
667  while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
668  fileInfo->RemoveMetaData(meta);
669  }
670 
671  if (meta) {
672  TObject* obj = fMetaDataList->FindObject("meta");
673  if (obj) {
674  fMetaDataList->Remove(obj);
675  delete obj;
676  }
677  } else
678  fMetaDataList->Clear();
679 }
680 
681 ////////////////////////////////////////////////////////////////////////////////
682 /// Sort the collection.
683 
685 {
686  if (!fList)
687  return;
688 
689  // Make sure the relevant bit has the wanted value
690  if (useindex) {
692  } else {
694  }
695 
696  fList->Sort();
697 }
698 
699 ////////////////////////////////////////////////////////////////////////////////
700 /// Export the relevant info as a string; use 'name' as collection name,
701 /// if defined, else use GetName().
702 /// The output object must be destroyed by the caller
703 
705 {
706  TString treeInfo;
707  if (GetDefaultTreeName()) {
709  if (popt == 1) {
710  treeInfo = GetDefaultTreeName();
711  if (meta)
712  treeInfo += TString::Format(", %lld entries", meta->GetEntries());
713  TFileInfoMeta *frac = GetMetaData("/FractionOfTotal");
714  if (frac)
715  treeInfo += TString::Format(", %3.1f %% of total", frac->GetEntries() / 10.);
716  } else {
717  treeInfo.Form(" %s ", GetDefaultTreeName());
718  if (treeInfo.Length() > 14) treeInfo.Replace(13, 1, '>');
719  treeInfo.Resize(14);
720  if (meta) {
721  if (meta->GetEntries() > 99999999) {
722  treeInfo += TString::Format("| %8lld ", meta->GetEntries());
723  } else {
724  treeInfo += TString::Format("| %8.4g ", (Double_t) meta->GetEntries());
725  }
726  }
727  }
728  } else {
729  treeInfo = " N/A";
730  }
731  if (popt == 0) treeInfo.Resize(25);
732 
733  // Renormalize the size to kB, MB or GB
734  const char *unit[4] = {"kB", "MB", "GB", "TB"};
735  Int_t k = 0;
736  Long64_t refsz = 1024;
737  Long64_t xsz = (Long64_t) (GetTotalSize() / refsz);
738  while (xsz > 1024 && k < 3) {
739  k++;
740  refsz *= 1024;
741  xsz = (Long64_t) (GetTotalSize() / refsz);
742  }
743 
744  // The name
745  TString dsname(name);
746  if (dsname.IsNull()) dsname = GetName();
747 
748  // Create the output string
749  TObjString *outs = 0;
750  if (popt == 1) {
751  outs = new TObjString(Form("%s %lld files, %lld %s, staged %d %%, tree: %s", dsname.Data(),
752  GetNFiles(), xsz, unit[k],
753  (Int_t)GetStagedPercentage(), treeInfo.Data()));
754  } else {
755  outs = new TObjString(Form("%s| %7lld |%s| %5lld %s | %3d %%", dsname.Data(),
756  GetNFiles(), treeInfo.Data(), xsz, unit[k],
758  }
759  // Done
760  return outs;
761 }
762 
763 ////////////////////////////////////////////////////////////////////////////////
764 /// Return the subset of files served by 'server'. The syntax for 'server' is
765 /// the standard URI one, i.e. `[<scheme>://]<host>[:port]`
766 
768 {
770 
771  // Server specification is mandatory
772  if (!server || strlen(server) <= 0) {
773  Info("GetFilesOnServer", "server undefined - do nothing");
774  return fc;
775  }
776 
777  // Nothing to do for empty lists
778  if (!fList || fList->GetSize() <= 0) {
779  Info("GetFilesOnServer", "the list is empty - do nothing");
780  return fc;
781  }
782 
783  // Define the server reference string
784  TUri uri(server);
785  TString srv, scheme("root"), port;
786  if (uri.GetScheme() != "") scheme = uri.GetScheme();
787  if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
788  srv.Form("%s://%s%s", scheme.Data(), TUrl(server).GetHostFQDN(), port.Data());
789  if (gDebug > 0)
790  Info("GetFilesOnServer", "searching for files on server: '%s' (input: '%s')",
791  srv.Data(), server);
792 
793  // Prepare the output
794  fc = new TFileCollection(GetName());
795  TString title;
796  if (GetTitle() && strlen(GetTitle()) > 0) {
797  title.Form("%s (subset on server %s)", GetTitle(), srv.Data());
798  } else {
799  title.Form("subset of '%s' on server %s", GetName(), srv.Data());
800  }
801  fc->SetTitle(title.Data());
802  // The default tree name
803  fc->SetDefaultTreeName(GetDefaultTreeName());
804 
805  // We look for URL starting with srv
806  srv.Insert(0, "^");
807 
808  // Go through the list
809  TIter nxf(fList);
810  TFileInfo *fi = 0;
811  while ((fi = (TFileInfo *)nxf())) {
812  TUrl *xu = 0;
813  if ((xu = fi->FindByUrl(srv.Data()))) {
814  // Create a new TFileInfo object
815  TFileInfo *nfi = new TFileInfo(xu->GetUrl(), fi->GetSize(),
816  fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
817  fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
818  if (fi->GetMetaDataList()) {
819  TIter nxm(fi->GetMetaDataList());
820  TFileInfoMeta *md = 0;
821  while ((md = (TFileInfoMeta *) nxm())) {
822  nfi->AddMetaData(new TFileInfoMeta(*md));
823  }
824  }
827  if (gDebug > 1)
828  Info("GetFilesOnServer", "adding: %s", xu->GetUrl());
829  fc->Add(nfi);
830  }
831  }
832 
833  // If nothing found, delete the object
834  if (fc->GetList()->GetSize() <= 0) {
835  delete fc;
836  fc = 0;
837  Info("GetFilesOnServer", "dataset '%s' has no files on server: '%s' (searched for: '%s')",
838  GetName(), server, srv.Data());
839  }
840 
841  // Fill up sums on the sub file collection
842  if (fc) {
843  fc->Update();
844  // Fraction of total in permille
845  Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
846  TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
848  fc->AddMetaData(m);
849  }
850 
851  // Done
852  return fc;
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 /// Return a map of TFileCollections with the files on each data server,
857 /// excluding servers in the comma-separated list 'exclude'.
858 /// If curronly is kTRUE, only the URL flagged as current in the TFileInfo
859 /// are considered.
860 
861 TMap *TFileCollection::GetFilesPerServer(const char *exclude, Bool_t curronly)
862 {
863  TMap *dsmap = 0;
864 
865  // Nothing to do for empty lists
866  if (!fList || fList->GetSize() <= 0) {
867  Info("GetFilesPerServer", "the list is empty - do nothing");
868  return dsmap;
869  }
870 
871  // List of servers to be ignored
872  THashList *excl = 0;
873  if (exclude && strlen(exclude) > 0) {
874  excl = new THashList;
875  excl->SetOwner();
876  TUri uri;
877  TString srvs(exclude), s, srv, scheme, port;
878  Int_t from = 0;
879  while (srvs.Tokenize(s, from, ",")) {
880  uri.SetUri(s.Data());
881  scheme = "root";
882  port = "";
883  if (uri.GetScheme() != "") scheme = uri.GetScheme();
884  if (uri.GetPort() != "") port.Form(":%s", uri.GetPort().Data());
885  srv.Form("%s://%s%s", scheme.Data(), TUrl(s.Data()).GetHostFQDN(), port.Data());
886  // Add
887  excl->Add(new TObjString(srv.Data()));
888  }
889  }
890 
891  // Prepare the output
892  dsmap = new TMap();
893 
894  // Go through the list
895  TIter nxf(fList);
896  TFileInfo *fi = 0;
897  TUri uri;
898  TString key;
899  TFileCollection *fc = 0;
900  while ((fi = (TFileInfo *)nxf())) {
901  // Save current URL
902  TUrl *curl = fi->GetCurrentUrl();
903  // Loop over URLs
904  if (!curronly) fi->ResetUrl();
905  TUrl *xurl = 0;
906  while ((xurl = (curronly) ? curl : fi->NextUrl())) {
907  // Find the key for this server
908  key.Form("%s://%s", xurl->GetProtocol(), xurl->GetHostFQDN());
909  // Check if this has to be ignored
910  if (excl && excl->FindObject(key.Data())) {
911  if (curronly) break;
912  continue;
913  } else if (excl && xurl->GetPort() > 0) {
914  // Complete the key, if needed, and recheck
915  key += TString::Format(":%d", xurl->GetPort());
916  if (excl->FindObject(key.Data())) {
917  if (curronly) break;
918  continue;
919  }
920  }
921  // Get the map entry for this key
922  TPair *ent = 0;
923  if (!(ent = (TPair *) dsmap->FindObject(key.Data()))) {
924  // Create the TFileCollection
925  fc = new TFileCollection(GetName());
926  TString title;
927  if (GetTitle() && strlen(GetTitle()) > 0) {
928  title.Form("%s (subset on server %s)", GetTitle(), key.Data());
929  } else {
930  title.Form("subset of '%s' on server %s", GetName(), key.Data());
931  }
932  fc->SetTitle(title.Data());
933  // The default tree name
935  // Add it to the map
936  dsmap->Add(new TObjString(key.Data()), fc);
937  // Notify
938  if (gDebug > 0)
939  Info("GetFilesPerServer", "found server: '%s' (fc: %p)", key.Data(), fc);
940  } else {
941  // Attach to the TFileCollection
942  fc = (TFileCollection *) ent->Value();
943  }
944  // Create a new TFileInfo object
945  TFileInfo *nfi = new TFileInfo(xurl->GetUrl(kTRUE), fi->GetSize(),
946  fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
947  fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
948  if (fi->GetMetaDataList()) {
949  TIter nxm(fi->GetMetaDataList());
950  TFileInfoMeta *md = 0;
951  while ((md = (TFileInfoMeta *) nxm())) {
952  nfi->AddMetaData(new TFileInfoMeta(*md));
953  }
954  }
957  fc->Add(nfi);
958  // In current_only mode we are done
959  if (curronly) break;
960  }
961  // Restore current URL
962  fi->SetCurrentUrl(curl);
963  }
964 
965  // Fill up sums on the sub file collections
966  TIter nxk(dsmap);
967  TObject *k = 0;
968  while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
969  fc->Update();
970  // Fraction of total in permille
971  Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
972  TFileInfoMeta *m = new TFileInfoMeta("FractionOfTotal", "External Info", xf);
974  fc->AddMetaData(m);
975  }
976 
977  // Cleanup
978  if (excl) delete excl;
979 
980  // Done
981  return dsmap;
982 }
983 
984 ////////////////////////////////////////////////////////////////////////////////
985 /// Add's a meta data object to the file collection object. The object will be
986 /// adopted by the TFileCollection and should not be deleted by the user.
987 /// Typically objects of class TFileInfoMeta or derivatives should be added,
988 /// but any class is accepted.
989 ///
990 /// NB : a call to TFileCollection::Update will remove these objects unless the
991 /// bit TFileInfoMeta::kExternal is set.
992 /// Returns kTRUE if successful, kFALSE otherwise.
993 
995 {
996  if (meta) {
997  if (!fMetaDataList) {
998  fMetaDataList = new TList;
1000  }
1001  fMetaDataList->Add(meta);
1002  return kTRUE;
1003  }
1004  return kFALSE;
1005 }
virtual const char * BaseName(const char *pathname)
Base name of a file name. Base name of /user/root is root.
Definition: TSystem.cxx:932
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
TUUID * GetUUID() const
Definition: TFileInfo.h:80
TFileInfoMeta * GetMetaData(const char *meta=0) const
Returns the meta data object with the specified meta name.
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:854
TFileCollection * GetFilesOnServer(const char *server)
Return the subset of files served by &#39;server&#39;.
const char * GetDefaultTreeName() const
Returns the tree set with SetDefaultTreeName if set Returns the name of the first tree in the meta da...
long long Long64_t
Definition: RtypesCore.h:69
void SetAnchor(const char *anchor)
Calls TUrl::SetAnchor() for all URLs contained in all TFileInfos.
auto * m
Definition: textangle.C:8
TLine * line
Collectable string class.
Definition: TObjString.h:28
float Float_t
Definition: RtypesCore.h:53
const char Option_t
Definition: RtypesCore.h:62
Long64_t GetTotalSize() const
const Ssiz_t kNPOS
Definition: RtypesCore.h:111
This class represents a WWW compatible URL.
Definition: TUrl.h:35
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
int GetPathInfo(const char *path, Long_t *id, Long_t *size, Long_t *flags, Long_t *modtime)
Get info about a file: id, size, flags, modification time.
Definition: TSystem.cxx:1374
TUrl * GetCurrentUrl() const
Return the current url.
Definition: TFileInfo.cxx:248
const char * GetProtocol() const
Definition: TUrl.h:67
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Float_t GetStagedPercentage() const
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
TUrl * NextUrl()
Iterator function, start iteration by calling ResetUrl().
Definition: TFileInfo.cxx:260
void SetEntries(Long64_t entries)
Definition: TFileInfo.h:146
Long64_t GetTotalEntries(const char *tree) const
Returns the number of entries for the specified tree (retrieved from meta data).
virtual Int_t GetEntries() const
Definition: TCollection.h:177
const TString GetPort() const
Definition: TUri.h:86
virtual void AddFirst(TObject *obj)
Add object at the beginning of the list.
Definition: TList.cxx:97
Regular expression class.
Definition: TRegexp.h:31
void Add(TObject *obj)
This function may not be used (but we need to provide it since it is a pure virtual in TCollection)...
Definition: TMap.cxx:53
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition: TObject.h:172
const char * AsString() const
Return message digest as string.
Definition: TMD5.cxx:220
Basic string class.
Definition: TString.h:131
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
virtual const char * DirName(const char *pathname)
Return the directory name in pathname.
Definition: TSystem.cxx:1004
bool Bool_t
Definition: RtypesCore.h:59
This class represents a RFC 3986 compatible URI.
Definition: TUri.h:35
void SetIndex(Int_t idx)
Definition: TFileInfo.h:99
TObject * FindObject(const char *name) const
Find object using its name.
Definition: THashList.cxx:262
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:644
Int_t GetIndex() const
Definition: TFileInfo.h:98
const char * GetHostFQDN() const
Return fully qualified domain name of url host.
Definition: TUrl.cxx:469
Bool_t R_ISREG(Int_t mode)
Definition: TSystem.h:119
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:694
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:387
virtual TObject * FindObject(const char *name) const
Delete a TObjLink object.
Definition: TList.cxx:574
Long64_t GetEntries() const
Definition: TFileInfo.h:139
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:677
Int_t fMode
Definition: TSystem.h:128
virtual void Sort(Bool_t order=kSortAscending)
Sort linked list.
Definition: TList.cxx:933
Long64_t fNStagedFiles
static struct mg_connection * fc(struct mg_context *ctx)
Definition: civetweb.c:3352
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
Bool_t SetUri(const TString &uri)
Parse URI and set the member variables accordingly, returns kTRUE if URI validates, and kFALSE otherwise: URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty.
Definition: TUri.cxx:601
virtual const char * GetDirEntry(void *dirp)
Get a directory entry. Returns 0 if no more entries.
Definition: TSystem.cxx:851
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition: TObject.cxx:128
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2286
Bool_t AddMetaData(TObject *meta)
Add&#39;s a meta data object to the file info object.
Definition: TFileInfo.cxx:384
THashList implements a hybrid collection class consisting of a hash table and a list to store TObject...
Definition: THashList.h:34
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
TFileCollection * GetStagedSubset()
Creates a subset of the files that have the kStaged & !kCorrupted bit set.
Bool_t SetCurrentUrl(const char *url)
Set &#39;url&#39; as current URL, if in the list Return kFALSE if not in the list.
Definition: TFileInfo.cxx:354
void RemoveMetaData(const char *meta=0)
Removes the indicated meta data object in all TFileInfos and this object If no name is given all meta...
TUrl * FindByUrl(const char *url, Bool_t withDeflt=kFALSE)
Find an element from a URL. Returns 0 if not found.
Definition: TFileInfo.cxx:276
TObject * Value() const
Definition: TMap.h:121
Long64_t Merge(TCollection *list)
Merge all TFileCollection objects in li into this TFileCollection object.
A doubly linked list.
Definition: TList.h:44
TFileInfoMeta * GetMetaData(const char *meta=0) const
Get meta data object with specified name.
Definition: TFileInfo.cxx:424
void Sort(Bool_t useindex=kFALSE)
Sort the collection.
Bool_t AddMetaData(TObject *meta)
Add&#39;s a meta data object to the file collection object.
TObjString * ExportInfo(const char *name=0, Int_t popt=0)
Export the relevant info as a string; use &#39;name&#39; as collection name, if defined, else use GetName()...
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
Int_t RemoveDuplicates()
Remove duplicates based on the UUID, typically after a verification.
THashList * GetList()
Int_t Add(TFileInfo *info)
Add TFileInfo to the collection.
Long64_t GetNFiles() const
void Print(Option_t *option="") const
Prints the contents of the TFileCollection.
virtual TObject * Remove(TObject *obj)
Remove object from the list.
Definition: TList.cxx:818
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
TObject * Next()
Definition: TCollection.h:249
TObject * Remove(TObject *obj)
Remove object from the list.
Definition: THashList.cxx:378
Collection abstract base class.
Definition: TCollection.h:63
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2264
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:405
void FormatSize(Long64_t bytes, TString &um, Double_t &size) const
Format size.
virtual void FreeDirectory(void *dirp)
Free a directory.
Definition: TSystem.cxx:843
#define Printf
Definition: TGeoToOCC.h:18
THashList * fList
const Bool_t kFALSE
Definition: RtypesCore.h:88
TFileCollection(const TFileCollection &)
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:102
void SetAnchor(const char *anchor)
Definition: TUrl.h:89
virtual Bool_t IsEmpty() const
Definition: TCollection.h:186
void SetDefaultTreeName(const char *treeName)
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
Bool_t IsTree() const
Definition: TFileInfo.h:142
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
Definition: TMap.h:40
int nentries
Definition: THbookFile.cxx:89
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
TMD5 * GetMD5() const
Definition: TFileInfo.h:81
static constexpr double s
virtual void Clear(Option_t *option="")
Remove all objects from the list.
Definition: TList.cxx:399
void ResetUrl()
Definition: TFileInfo.h:68
Int_t GetPort() const
Definition: TUrl.h:81
static constexpr double um
const char * AsString() const
Return UUID as string. Copy string immediately since it will be reused.
Definition: TUUID.cxx:533
Bool_t IsNull() const
Definition: TString.h:402
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex &#39;this&#39; was created with.
Definition: TPRegexp.cxx:708
Mother of all ROOT objects.
Definition: TObject.h:37
std::istream & ReadToDelim(std::istream &str, char delim='\n')
Read up to an EOF, or a delimiting character, whichever comes first.
Definition: Stringio.cxx:89
Int_t AddFromFile(const char *file, Int_t nfiles=-1, Int_t firstfile=1)
Add file names contained in the specified text file.
void SetDefaultMetaData(const char *meta)
Moves the indicated meta data in the first position, so that it becomes effectively the default...
TObject * FindObject(const char *keyname) const
Check if a (key,value) pair exists with keyname as name of the key.
Definition: TMap.cxx:214
void PrintDetailed(TString &showOnly) const
Print detailed.
Bool_t RemoveMetaData(const char *meta=0)
Remove the metadata object.
Definition: TFileInfo.cxx:401
Long64_t fNCorruptFiles
virtual void Add(TObject *obj)
Definition: TList.h:87
virtual ~TFileCollection()
Cleanup.
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition: TPRegexp.h:97
Class that contains a list of TFileInfo&#39;s and accumulated meta data information about its entries...
void ResetBitAll(UInt_t f)
Reset the bit for all TFileInfos.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
Float_t GetCorruptedPercentage() const
virtual void * OpenDirectory(const char *name)
Open a directory. Returns 0 if directory does not exist.
Definition: TSystem.cxx:834
R__EXTERN Int_t gDebug
Definition: Rtypes.h:86
#define c(i)
Definition: RSha256.hxx:101
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Definition: TMap.cxx:235
Definition: tree.py:1
Class describing a generic file including meta information.
Definition: TFileInfo.h:38
void ResetBit(UInt_t f)
Definition: TObject.h:171
virtual Bool_t ExpandPathName(TString &path)
Expand a pathname getting rid of special shell characters like ~.
Definition: TSystem.cxx:1254
virtual void Print(Option_t *option="") const
Default print for collections, calls Print(option, 1).
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
Bool_t IsWhitespace() const
Definition: TString.h:403
TMap * GetFilesPerServer(const char *exclude=0, Bool_t curronly=kFALSE)
Return a map of TFileCollections with the files on each data server, excluding servers in the comma-s...
const Bool_t kTRUE
Definition: RtypesCore.h:87
Long64_t GetSize() const
Definition: TFileInfo.h:79
void SetBitAll(UInt_t f)
Set the bit for all TFileInfos.
char name[80]
Definition: TGX11.cxx:109
TList * GetMetaDataList() const
Definition: TFileInfo.h:82
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:866
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1070
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
const TString GetScheme() const
Definition: TUri.h:80
const char * Data() const
Definition: TString.h:364