32 fBasePath(basePath), fFileName(fileName), fTreeName(treeName),
33 fRegexpRaw(regexp), fAnchor(anchor), fQuery(query), fArchSubst(archSubst),
34 fRegexp(0), fSearchId(
""), fGridResult(0)
100 Info(
"GetGridResult",
"Returning cached AliEn find results");
104 Info(
"GetGridResult",
"Querying AliEn file catalog");
111 if (!
gGrid)
return NULL;
115 Info(
"GetGridResult",
"AliEn find %s %s [regexp=%s] [archsubst=%d]",
120 if (!fGridResult)
return NULL;
139 TIter it(fGridResult);
144 while (( map = dynamic_cast<TMap *>(it.
Next()) ) != NULL) {
152 TObject *exmap = fGridResult->Remove(map);
153 if (exmap)
delete exmap;
179 if (reArchSubst)
delete reArchSubst;
191 searchIdStr.
Form(
"BasePath=%s FileName=%s Anchor=%s ArchSubst=%d " 192 "TreeName=%s Regexp=%s",
217 for (
Int_t i=0; i<nEntries; i++) {
224 Info(
"GetCollection",
">> %s", tUrl.
Data());
247 Printf(
"BasePath=%s FileName=%s Anchor=%s ArchSubst=%d " 248 "TreeName=%s Regexp=%s (query %s a result)",
344 kfNoopRedirUrl =
new TUrl(
"noop://redir");
345 kfNoopUnknownUrl =
new TUrl(
"noop://unknown");
346 kfNoopNoneUrl =
new TUrl(
"noop://none");
348 fCacheExpire_s = cacheExpire_s;
352 if (fUrlTpl.Contains(
"<path>")) {
362 dsDirFmt.
Form(
"dir:%s perms:open", cacheDir.
Data());
366 Error(
"Init",
"Cannot initialize cache on directory %s", cacheDir.
Data());
376 Info(
"TDataSetManagerAliEn",
"Caching on %s", cacheDir.
Data());
377 Info(
"TDataSetManagerAliEn",
"URL schema: %s", urlTpl.
Data());
378 Info(
"TDataSetManagerAliEn",
"Cache expires after: %lus", cacheExpire_s);
385 const char *urlTpl,
ULong_t cacheExpire_s)
387 kfNoopRedirUrl(0), kfNoopUnknownUrl(0), kfNoopNoneUrl(0)
389 Init(cacheDir, urlTpl, cacheExpire_s);
399 TPMERegexp reCache(
"(^| )cache:([^ ]+)( |$)");
400 if (reCache.
Match(cfgStr) != 4) {
401 Error(
"TDataSetManagerAliEn",
"No cache directory specified");
406 TPMERegexp reUrlTpl(
"(^| )urltemplate:([^ ]+)( |$)");
407 if (reUrlTpl.
Match(cfgStr) != 4) {
408 Error(
"TDataSetManagerAliEn",
"No local URL template specified");
413 TPMERegexp reCacheExpire(
"(^| )cacheexpiresecs:([0-9]+)( |$)");
414 if (reCacheExpire.
Match(cfgStr) != 4) {
415 Error(
"TDataSetManagerAliEn",
"No cache expiration set");
420 Init(reCache[2], reUrlTpl[2], (
ULong_t)reCacheExpire[2].Atoll());
441 if (reKind.
Match(uri) != 2) {
442 Error(
"GetFindCommandsFromUri",
"Data, Sim or Find not specified");
447 TPMERegexp reMode(
"(^|;)Mode=([A-Za-z]+)(;|$)");
448 if (reMode.
Match(uri) != 4) {
459 Error(
"GetFindCommandsFromUri",
460 "Wrong analysis mode specified: use one of: Mode=remote, local, cache");
465 TList *findCommands = NULL;
467 if (reKind[1].BeginsWith(
"Find")) {
479 Error(
"GetFindCommandsFromUri",
"Malformed AliEn find command");
483 findCommands =
new TList();
486 treeName, regexp, query) );
490 Bool_t sim = (reKind[1][0] ==
'S');
493 std::vector<Int_t> *runList;
500 Error(
"GetFindCommandsFromUri",
"Invalid parameters");
505 findCommands =
new TList();
526 basePathRun.
Form(
"/alice/sim/%s", lhcPeriod.
Data());
528 basePathRun.
Form(
"/alice/sim/%d/%s", year, lhcPeriod.
Data());
534 if ((pass[0] >=
'0') && (pass[0] <=
'9')) pass.
Prepend(
"pass");
535 basePathRun.
Form(
"/alice/data/%d/%s", year, lhcPeriod.
Data());
540 std::vector<Int_t> validRuns;
543 if (!validRunDirs)
return NULL;
545 TIter nrd(validRunDirs);
548 validRuns.resize( (
size_t)(validRunDirs->
GetEntries()) );
550 while (( dir = dynamic_cast<TMap *>(nrd()) ) != NULL) {
554 if (run > 0) validRuns.push_back(run);
558 for (
UInt_t i=0; i<runList->size(); i++) {
562 for (
UInt_t j=0; j<validRuns.size(); j++) {
563 if (validRuns[j] == (*runList)[i]) {
570 Warning(
"TDataSetManagerAliEn::GetFindCommandsFromUri",
571 "Avoiding unnecessary find on run %d: not found", (*runList)[i]);
576 Info(
"TDataSetManagerAliEn::GetFindCommandsFromUri",
"Run found: %d", (*runList)[i]);
580 TString basePath, fileName, temp;
584 temp.
Form(
"/%06d", runList->at(i));
585 basePath = basePathRun + temp;
588 temp.
Form(
"/AOD%03d", aodNum);
594 temp.
Form(
"/%09d/ESDs/%s", runList->at(i), pass.
Data());
595 basePath = basePathRun + temp;
600 temp.
Form(
"/AOD%03d", aodNum);
609 fileName =
"AliESDs.root";
610 treeName =
"/esdTree";
613 fileName =
"AliAOD.root";
614 treeName =
"/aodTree";
627 TPMERegexp reForceUpdate(
"(^|;)ForceUpdate(;|$)");
628 forceUpdate = (reForceUpdate.
Match(uri) == 3);
645 TPMERegexp reMode(
"(^|;)(Mode=[A-Za-z]+)(;|$)");
646 if (reMode.Match(uri) == 4)
648 TPMERegexp reForceUpdate(
"(^|;)(ForceUpdate)(;|$)");
649 if (reForceUpdate.
Match(uri) == 4)
653 TPMERegexp reBasePath(
"(^|;)(BasePath=([^; ]+))(;|$)");
654 if (reBasePath.
Match(uri) != 5) {
655 ::Error(
"TDataSetManagerAliEn::ParseCustomFindUri",
656 "Base path not specified");
660 basePath = reBasePath[3];
663 TPMERegexp reFileName(
"(^|;)(FileName=([^; ]+))(;|$)");
664 if (reFileName.
Match(uri) != 5) {
665 ::Error(
"TDataSetManagerAliEn::ParseCustomFindUri",
666 "File name not specified");
670 fileName = reFileName[3];
673 TPMERegexp reAnchor(
"(^|;)(Anchor=([^; ]+))(;|$)");
674 if (reAnchor.
Match(uri) != 5)
678 anchor = reAnchor[3];
682 TPMERegexp reQuery(
"(^|;)(Query=([^; ]+))(;|$)");
683 if (reQuery.
Match(uri) != 5)
691 TPMERegexp reTreeName(
"(^|;)(Tree=(/[^; ]+))(;|$)");
692 if (reTreeName.
Match(uri) != 5)
696 treeName = reTreeName[3];
700 TPMERegexp reRegexp(
"(^|;)(Regexp=([^; ]+))(;|$)");
701 if (reRegexp.
Match(uri) != 5)
705 regexp = reRegexp[3];
712 ::Error(
"TDataSetManagerAliEn::ParseCustomFindUri",
713 "There are unrecognized parameters in the dataset find string");
731 checkUri = uri(idx, uri.
Length());
735 TPMERegexp reMode(
"(^|;)(Mode=[A-Za-z]+)(;|$)");
736 if (reMode.Match(uri) == 4)
738 TPMERegexp reForceUpdate(
"(^|;)(ForceUpdate)(;|$)");
739 if (reForceUpdate.
Match(uri) == 4)
746 TPMERegexp rePeriod(
"(^|;)(Period=(LHC([0-9]{2})[^;]*))(;|$)");
747 if (rePeriod.
Match(uri) != 6) {
748 ::Error(
"TDataSetManagerAliEn::ParseOfficialDataUri",
749 "LHC period not specified (e.g. Period=LHC10h)");
754 period = rePeriod[3];
755 year = rePeriod[4].Atoi() + 2000;
761 TPMERegexp reFormat(
"(^|;)(Variant=(ESDs?|AOD([0-9]{3})))(;|$)");
762 if (reFormat.
Match(uri) != 6) {
763 ::Error(
"TDataSetManagerAliEn::ParseOfficialDataUri",
764 "Data variant (e.g., Variant=ESD or AOD079) not specified");
769 if (reFormat[3].BeginsWith(
"ESD")) esd =
kTRUE;
772 aodNum = reFormat[4].Atoi();
779 TPMERegexp rePass(
"(^|;)(Pass=([a-zA-Z_0-9-]+))(;|$)");
780 if ((!sim) && (rePass.
Match(uri) != 5)) {
781 ::Error(
"TDataSetManagerAliEn::ParseOfficialDataUri",
782 "Pass (e.g., Pass=cpass1_muon) is mandatory on real data");
792 TPMERegexp reRun(
"(^|;)(Run=([0-9,-]+))(;|$)");
793 if (reRun.
Match(uri) != 5) {
794 ::Error(
"TDataSetManagerAliEn::ParseOfficialDataUri",
795 "Run or run range not specified (e.g., Run=139104-139107,139306)");
806 ::Error(
"TDataSetManagerAliEn::ParseOfficialDataUri",
807 "There are unrecognized parameters in dataset string");
831 ::Error(
"TDataSetManagerAliEn::AliEnWhereIs",
"input AliEn URL not given!");
836 ::Error(
"TDataSetManagerAliEn::AliEnWhereIs",
"no AliEn grid connection available!");
846 ::Error(
"TDataSetManagerAliEn::AliEnWhereIs",
"cannot get response from AliEn");
852 TString se, pfnUrl, validPfnUrl;
853 while ( (pfn = dynamic_cast<TMap *>( nextPfn() )) != NULL ) {
864 validPfnUrl = pfnUrl;
867 else if (!onlyFromCloseSE && validPfnUrl.
IsNull()) {
868 validPfnUrl = pfnUrl;
884 if (validPfnUrl.
IsNull()) {
886 ::Error(
"TDataSetManagerAliEn::AliEnWhereIs",
"cannot find endpoint URL for %s", alienUrl->
GetUrl());
897 options.Append(
"&zip=");
898 options.Append(anchor);
909 std::vector<Int_t> *runNumsPtr =
new std::vector<Int_t>();
910 std::vector<Int_t> &runNums = *runNumsPtr;
917 while ( (runOs = dynamic_cast<TObjString *>(run.
Next())) ) {
922 if (p.
Match(runStr) == 3) {
923 Int_t r1 = p[1].Atoi();
924 Int_t r2 = p[2].Atoi();
934 runNums.push_back(
r);
938 runNums.push_back(runStr.
Atoi());
945 for (
UInt_t i=0; i<runNums.size(); i++) {
946 for (
UInt_t j=i+1; j<runNums.size(); j++) {
947 if (runNums[j] < runNums[i]) {
948 runNums[i] = runNums[i] ^ runNums[j];
949 runNums[j] = runNums[i] ^ runNums[j];
950 runNums[i] = runNums[i] ^ runNums[j];
957 std::vector<Int_t>::iterator itr = runNums.begin();
959 while (itr != runNums.end()) {
960 if ((itr == runNums.begin()) || (prevVal != *itr)) {
965 itr = runNums.erase(itr);
984 if (!findCmds)
return NULL;
993 while ((af = dynamic_cast<TAliEnFind *>(it.
Next())) != NULL) {
1003 now = now/1000 + 788914800;
1007 Info(
"GetDataSet",
"Ignoring cached query result: forcing update");
1011 Info(
"GetDataSet",
"Dataset cache has expired");
1015 Info(
"GetDataSet",
"Getting file collection from cache");
1022 Info(
"GetDataSet",
"Getting file collection from AliEn");
1026 Error(
"GetDataSet",
"Cannot get collection from AliEn");
1040 while ((fi = dynamic_cast<TFileInfo *>(itCache.Next()))) {
1069 fillLocality =
kTRUE;
1075 Info(
"GetDataSet",
"Not caching data locality information now");
1077 while ((fi = dynamic_cast<TFileInfo *>(itCache.Next())))
1082 saveToCache =
kTRUE;
1094 Info(
"GetDataSet",
"No dataset locality information in cache");
1104 while ((fi = dynamic_cast<TFileInfo *>(itCache.Next()))) {
1108 fillLocality =
kTRUE;
1109 saveToCache =
kTRUE;
1125 while (( fi = dynamic_cast<TFileInfo *>(nxtLoc()) )) {
1141 Info(
"GetDataSet",
"Filling dataset locality information: " 1142 "it might take time, be patient!");
1149 Error(
"GetDataSet",
"Can't create file stager");
1158 Error(
"GetDataSet",
"Endpoint lookup returned an error");
1166 Info(
"GetDataSet",
"Lookup successful for %d file(s)", rv);
1181 Warning(
"GetDataSet",
"Could not cache retrieved information");
1187 Info(
"GetDataSet",
"Dataset information currently cached follows");
1188 newFc->
Print(
"filter:SsCc");
1196 while ((fi = dynamic_cast<TFileInfo *>(itCache.Next()))) {
1208 for (
Int_t i=0; i<nDeleteUrls; i++) {
1217 for (
Int_t i=0; i<nDeleteUrls; i++) {
1245 if (fstg)
delete fstg;
1258 return existsNonEmpty;
static Bool_t ParseOfficialDataUri(TString &uri, Bool_t sim, TString &period, Int_t &year, std::vector< Int_t > *&runList, Bool_t &esd, Int_t &aodNum, TString &pass)
virtual void SetRegexp(const char *regexp)
virtual Int_t ClearCache(const char *)
Clear cached information matching uri.
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
virtual Bool_t ExistsDataSet(const char *uri)
Checks if the indicated dataset exits.
const char * GetDefaultTreeName() const
Returns the tree set with SetDefaultTreeName if set Returns the name of the first tree in the meta da...
virtual void SetArchSubst(Bool_t archSubst)
virtual const char * GetKey(UInt_t, const char *) const
void Final()
MD5 finalization, ends an MD5 message-digest operation, writing the the message digest and zeroizing ...
Bool_t EqualTo(const char *cs, ECaseCompare cmp=kExact) const
Collectable string class.
const TUrl * kfNoopNoneUrl
This class represents a WWW compatible URL.
static TUrl * AliEnWhereIs(TUrl *alienUrl, TString &closeSE, Bool_t onlyFromCloseSE)
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t RemoveUrl(const char *url)
Remove an URL. Returns kTRUE if successful, kFALSE otherwise.
TUrl * GetCurrentUrl() const
Return the current url.
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
virtual void InvalidateGridResult()
virtual TMap * GetDataSets(const char *, UInt_t)
Returns all datasets for the <group> and <user> specified by <uri>.
virtual Int_t GetEntries() const
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
const char * AsString() const
Return message digest as string.
virtual TGridResult * GetGridResult(Bool_t forceNewQuery=kFALSE)
Query the AliEn file catalog.
Long_t GetModTime(const char *uri)
Gets last dataset modification time.
const char * GetOptions() const
Bool_t AddUrl(const char *url, Bool_t infront=kFALSE)
Add a new URL.
TString & Prepend(const char *cs)
virtual Bool_t Cd(const char *="", Bool_t=kFALSE)
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
virtual TFileCollection * GetCollection(Bool_t forceNewQuery=kFALSE)
const char * GetFile() const
virtual TGridResult * Query(const char *, const char *, const char *="", const char *="")
static struct mg_connection * fc(struct mg_context *ctx)
Int_t Update(Long64_t avgsize=-1)
Update accumulated information about the elements of the collection (e.g.
static TGrid * Connect(const char *grid, const char *uid=0, const char *pw=0, const char *options=0)
The grid should be of the form: <grid>://<host>[:<port>], e.g.
This code implements the MD5 message-digest algorithm.
TString & Append(const char *cs)
void MayNotUse(const char *method) const
Use this method to signal that a method (defined in a base class) may not be called in a derived clas...
virtual Int_t ShowCache(const char *)
Show cached information matching uri.
const char * GetAnchor() const
TUrl * FindByUrl(const char *url, Bool_t withDeflt=kFALSE)
Find an element from a URL. Returns 0 if not found.
const TUrl * kfNoopUnknownUrl
virtual TTime Now()
Get current time in milliseconds since 0:00 Jan 1 1995.
const TUrl * kfNoopRedirUrl
Bool_t RemoveUrlAt(Int_t i)
Remove URL at given position. Returns kTRUE on success, kFALSE on error.
void Update(const UChar_t *buf, UInt_t len)
Update TMD5 object to reflect the concatenation of another buffer full of bytes.
R__EXTERN TSystem * gSystem
if object ctor succeeded but object should not be used
Int_t Add(TFileInfo *info)
Add TFileInfo to the collection.
virtual void SetFileName(const char *fileName)
Long64_t GetNFiles() const
void Print(Option_t *option="") const
Prints the contents of the TFileCollection.
Bool_t ParseUri(const char *uri, TString *dsGroup=0, TString *dsUser=0, TString *dsName=0, TString *dsTree=0, Bool_t onlyCurrent=kFALSE, Bool_t wildcards=kFALSE)
Parses a (relative) URI that describes a DataSet on the cluster.
Int_t WriteDataSet(const char *group, const char *user, const char *dsName, TFileCollection *dataset, UInt_t option=0, TMD5 *checksum=0)
Writes indicated dataset.
static std::vector< Int_t > * ExpandRunSpec(TString &runSpec)
virtual Int_t LocateCollection(TFileCollection *fc, Bool_t addDummyUrl=kFALSE)
Massive location of files.
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
virtual ~TAliEnFind()
Destructor.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
TGridResult * fGridResult
static Bool_t ParseCustomFindUri(TString &uri, TString &basePath, TString &fileName, TString &anchor, TString &query, TString &treeName, TString ®exp)
virtual Int_t RegisterDataSet(const char *, TFileCollection *, const char *)
Register a dataset, perfoming quota checkings, if needed.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
virtual void Init(TString cacheDir, TString urlTpl, ULong_t cacheExpire_s)
virtual void SetBasePath(const char *basePath)
virtual ~TDataSetManagerAliEn()
const char * GetGrid() const
void SetAnchor(const char *anchor)
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
TAliEnFind & operator=(const TAliEnFind &rhs)
Assignment operator. Cached query result is not copied.
void SetDefaultTreeName(const char *treeName)
virtual TFileCollection * GetDataSet(const char *uri, const char *=0)
Utility function used in various methods for user dataset upload.
virtual void Print(Option_t *opt="") const
This method must be overridden when a class wants to print itself.
virtual Int_t ScanDataSet(const char *, UInt_t)
Scans the dataset indicated by <uri> and returns the number of missing files.
TMap implements an associative array of (key,value) pairs using a THashTable for efficient retrieval ...
static TFileStager * Open(const char *stager)
Open a stager, after having loaded the relevant plug-in.
TAliEnFind(const TString &basePath="", const TString &fileName="", const TString &anchor="", const Bool_t archSubst=kFALSE, const TString &treeName="", const TString ®exp="", const TString &query="")
Constructor.
virtual TList * GetFindCommandsFromUri(TString &uri, EDataMode &dataMode, Bool_t &forceUpdate)
Parse kind.
virtual const char * GetSearchId()
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Mother of all ROOT objects.
virtual void SetTreeName(const char *fileName)
virtual void Add(TObject *obj)
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Class that contains a list of TFileInfo's and accumulated meta data information about its entries...
TFileCollection * GetDataSet(const char *uri, const char *srv=0)
Utility function used in various methods for user dataset upload.
virtual void InvalidateSearchId()
void SetOptions(const char *opt)
virtual void ShowDataSets(const char *="*", const char *="")
Prints formatted information about the dataset 'uri'.
TObject * GetValue(const char *keyname) const
Returns a pointer to the value associated with keyname as name of the key.
Int_t Atoi() const
Return integer value of string.
virtual void SetAnchor(const char *anchor)
virtual TGridResult * Ls(const char *="", Option_t *="", Bool_t=kFALSE)
Class describing a generic file including meta information.
virtual Bool_t RemoveDataSet(const char *uri)
Removes the indicated dataset.
virtual const char * GetName() const
Returns name of object.
Int_t Substitute(TString &s, const TString &r, Bool_t doDollarSubst=kTRUE)
Substitute matching part of s with r, dollar back-ref substitution is performed if doDollarSubst is t...
void SetString(const char *s)
virtual TGridResult * Command(const char *, Bool_t=kFALSE, UInt_t=2)
TDataSetManagerFile * fCache
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
const char * Data() const