ROOT logo
ROOT » PROOF » PROOF » TDataSetManager

class TDataSetManager: public TObject


TDataSetManager

This class contains functions to handle datasets in PROOF
It is the layer between TProofServ and the file system that stores
the datasets.


Function Members (Methods)

public:
TDataSetManager(const char* group = 0, const char* user = 0, const char* options = 0)
virtual~TDataSetManager()
voidTObject::AbstractMethod(const char* method) const
virtual voidTObject::AppendPad(Option_t* option = "")
virtual voidTObject::Browse(TBrowser* b)
static Bool_tCheckDataSetSrvMaps(TUrl* furl, TString& fn, TList* srvmaplist = 0)
static TClass*Class()
virtual const char*TObject::ClassName() const
virtual voidTObject::Clear(Option_t* = "")
virtual Int_tClearCache(const char* uri)
virtual TObject*TObject::Clone(const char* newname = "") const
virtual Int_tTObject::Compare(const TObject* obj) const
virtual voidTObject::Copy(TObject& object) const
static TStringCreateUri(const char* dsGroup = 0, const char* dsUser = 0, const char* dsName = 0, const char* dsTree = 0)
virtual voidTObject::Delete(Option_t* option = "")MENU
virtual Int_tTObject::DistancetoPrimitive(Int_t px, Int_t py)
virtual voidTObject::Draw(Option_t* option = "")
virtual voidTObject::DrawClass() constMENU
virtual TObject*TObject::DrawClone(Option_t* option = "") constMENU
virtual voidTObject::Dump() constMENU
virtual voidTObject::Error(const char* method, const char* msgfmt) const
virtual voidTObject::Execute(const char* method, const char* params, Int_t* error = 0)
virtual voidTObject::Execute(TMethod* method, TObjArray* params, Int_t* error = 0)
virtual voidTObject::ExecuteEvent(Int_t event, Int_t px, Int_t py)
virtual Bool_tExistsDataSet(const char* uri)
virtual voidTObject::Fatal(const char* method, const char* msgfmt) const
static Int_tFillMetaData(TFileInfo* fi, TDirectory* d, const char* rdir = "/")
virtual TObject*TObject::FindObject(const char* name) const
virtual TObject*TObject::FindObject(const TObject* obj) const
virtual Long64_tGetAvgFileSize() const
virtual TFileCollection*GetDataSet(const char* uri, const char* server = 0)
virtual TMap*GetDataSets(const char* uri, UInt_t = TDataSetManager::kExport)
static TList*GetDataSetSrvMaps()
virtual Option_t*TObject::GetDrawOption() const
static Long_tTObject::GetDtorOnly()
virtual Long64_tGetGroupQuota(const char* group)
virtual TMap*GetGroupQuotaMap()
virtual Long64_tGetGroupUsed(const char* group)
virtual const char*TObject::GetIconName() const
virtual const char*TObject::GetName() const
virtual char*TObject::GetObjectInfo(Int_t px, Int_t py) const
static Bool_tTObject::GetObjectStat()
virtual Option_t*TObject::GetOption() const
virtual TMap*GetSubDataSets(const char* uri, const char* excludeservers)
virtual const char*TObject::GetTitle() const
virtual UInt_tTObject::GetUniqueID() const
virtual Bool_tTObject::HandleTimer(TTimer* timer)
virtual ULong_tTObject::Hash() const
virtual voidTObject::Info(const char* method, const char* msgfmt) const
virtual Bool_tTObject::InheritsFrom(const char* classname) const
virtual Bool_tTObject::InheritsFrom(const TClass* cl) const
virtual voidTObject::Inspect() constMENU
voidTObject::InvertBit(UInt_t f)
virtual TClass*IsA() const
virtual Bool_tTObject::IsEqual(const TObject* obj) const
virtual Bool_tTObject::IsFolder() const
Bool_tTObject::IsOnHeap() const
virtual Bool_tTObject::IsSortable() const
Bool_tTObject::IsZombie() const
virtual voidTObject::ls(Option_t* option = "") const
voidTObject::MayNotUse(const char* method) const
virtual voidMonitorUsedSpace(TVirtualMonitoringWriter* monitoring)
virtual Bool_tTObject::Notify()
virtual Int_tNotifyUpdate(const char* group = 0, const char* user = 0, const char* dspath = 0, Long_t mtime = 0, const char* checksum = 0)
voidTObject::Obsolete(const char* method, const char* asOfVers, const char* removedFromVers) const
static voidTObject::operator delete(void* ptr)
static voidTObject::operator delete(void* ptr, void* vp)
static voidTObject::operator delete[](void* ptr)
static voidTObject::operator delete[](void* ptr, void* vp)
void*TObject::operator new(size_t sz)
void*TObject::operator new(size_t sz, void* vp)
void*TObject::operator new[](size_t sz)
void*TObject::operator new[](size_t sz, void* vp)
virtual voidTObject::Paint(Option_t* option = "")
static TList*ParseDataSetSrvMaps(const TString& srvmaps)
virtual voidParseInitOpts(const char* opts)
Bool_tParseUri(const char* uri, TString* dsGroup = 0, TString* dsUser = 0, TString* dsName = 0, TString* dsTree = 0, Bool_t onlyCurrent = kFALSE, Bool_t wildcards = kFALSE)
virtual voidTObject::Pop()
virtual voidTObject::Print(Option_t* option = "") const
virtual Int_tTObject::Read(const char* name)
virtual voidTObject::RecursiveRemove(TObject* obj)
virtual Int_tRegisterDataSet(const char* uri, TFileCollection* dataSet, const char* opt)
virtual Bool_tRemoveDataSet(const char* uri)
voidTObject::ResetBit(UInt_t f)
virtual voidTObject::SaveAs(const char* filename = "", Option_t* option = "") constMENU
virtual voidTObject::SavePrimitive(ostream& out, Option_t* option = "")
Int_tScanDataSet(const char* uri, const char* opt)
virtual Int_tScanDataSet(const char* uri, UInt_t option = kReopen|kDebug)
static Int_tScanDataSet(TFileCollection* dataset, Int_t fopt, Int_t sopt = 0, Int_t ropt = 0, Bool_t dbg = kFALSE, Int_t* touched = 0, Int_t* opened = 0, Int_t* disappeared = 0, TList* flist = 0, Long64_t avgsz = -1, const char* mss = 0, Int_t maxfiles = -1, const char* stageopts = 0)
static Int_tScanFile(TFileInfo* fileinfo, Bool_t notify)
voidTObject::SetBit(UInt_t f)
voidTObject::SetBit(UInt_t f, Bool_t set)
virtual voidTObject::SetDrawOption(Option_t* option = "")MENU
static voidTObject::SetDtorOnly(void* obj)
static voidTObject::SetObjectStat(Bool_t stat)
voidSetScanCounters(Int_t t = -1, Int_t o = -1, Int_t d = -1)
virtual voidTObject::SetUniqueID(UInt_t uid)
virtual Int_tShowCache(const char* uri)
virtual voidShowDataSets(const char* uri = "*", const char* opt = "")
virtual voidShowMembers(TMemberInspector& insp)
virtual voidShowQuota(const char* opt)
virtual voidStreamer(TBuffer& b)
voidStreamerNVirtual(TBuffer& b)
virtual voidTObject::SysError(const char* method, const char* msgfmt) const
Bool_tTObject::TestBit(UInt_t f) const
Int_tTObject::TestBits(UInt_t f) const
virtual voidTObject::UseCurrentStyle()
virtual voidTObject::Warning(const char* method, const char* msgfmt) const
virtual Int_tTObject::Write(const char* name = 0, Int_t option = 0, Int_t bufsize = 0)
virtual Int_tTObject::Write(const char* name = 0, Int_t option = 0, Int_t bufsize = 0) const
protected:
virtual voidTObject::DoError(int level, const char* location, const char* fmt, va_list va) const
virtual TMap*GetGroupUsedMap()
Int_tGetNDisapparedFiles() const
Int_tGetNOpenedFiles() const
Int_tGetNTouchedFiles() const
voidGetQuota(const char* group, const char* user, const char* dsName, TFileCollection* dataset)
virtual TMap*GetUserUsedMap()
voidTObject::MakeZombie()
voidPrintDataSet(TFileCollection* fc, Int_t popt = 0)
voidPrintUsedSpace()
Bool_tReadGroupConfig(const char* cf = 0)
static Long64_tToBytes(const char* size = 0)
virtual voidUpdateUsedSpace()

Data Members

public:
enum EDataSetStatusBits { kCheckQuota
kAllowRegister
kAllowVerify
kTrustInfo
kIsSandbox
kUseCache
kDoNotUseCache
};
enum EDataSetWorkOpts { kDebug
kShowDefault
kPrint
kExport
kQuotaUpdate
kSetDefaultTree
kForceScan
kNoHeaderPrint
kReopen
kTouch
kMaxFiles
kReadShort
kFileMustExist
kNoAction
kLocateOnly
kStageOnly
kNoCacheUpdate
kRefreshLs
kList
kAllFiles
kStagedFiles
kNoStagedCheck
};
enum TObject::EStatusBits { kCanDelete
kMustCleanup
kObjInCanvas
kIsReferenced
kHasUUID
kCannotPick
kNoContextMenu
kInvalidObject
};
enum TObject::[unnamed] { kIsOnHeap
kNotDeleted
kZombie
kBitMask
kSingleKey
kOverwrite
kWriteDelete
};
protected:
Long64_tfAvgFileSizeAverage file size to be used to estimate the dataset size (in MB)
TUrifBaseBase URI used to parse dataset names
TStringfCommonGroupGroup that stores the COMMON datasets
TStringfCommonUserUser that stores the COMMON datasets
TStringfGroupGroup to which the owner of this session belongs
TStringfGroupConfigFilePath to the group config file
TMapfGroupQuotaGroup quotas (read from config file)
TMapfGroupUsed<group> --> <used bytes> (TParameter)
Long_tfMTimeGroupConfigLast modification of the group config file
Int_tfNDisappearedFilesNumber of files disappared in the last ScanDataSet operation
Int_tfNOpenedFilesNumber of files opened in the last ScanDataSet operation
Int_tfNTouchedFilesNumber of files touched in the last ScanDataSet operation
TStringfUserOwner of the session
TMapfUserUsed<group> --> <map of users> --> <value>
static TStringfgCommonDataSetTagName for common datasets, default: COMMON
static TList*fgDataSetSrvMapsList of TPair(TRegexp, TObjString) for mapping server coordinates

Class Charts

Inheritance Inherited Members Includes Libraries
Class Charts

Function documentation

TDataSetManager(const char* group = 0, const char* user = 0, const char* options = 0)
 Main constructor
~TDataSetManager()
 Destructor
void ParseInitOpts(const char* opts)
 Parse the opts string and set the init bits accordingly
 Available options:
    Cq:               set kCheckQuota
    Ar:               set kAllowRegister
    Av:               set kAllowVerify
    Ti:               set kTrustInfo
    Sb:               set kIsSandbox
    Ca:               set kUseCache or kDoNotUseCache
 The opts string may also contain additional unrelated info: in such a case
 the field delimited by the prefix "opt:" is analyzed, e.g. if opts is
 "/tmp/dataset  opt:Cq:-Ar: root://lxb6046.cern.ch" only the substring
 "Cq:-Ar:" will be parsed .
Bool_t ReadGroupConfig(const char* cf = 0)
 Read group config file 'cf'.
 If cf == 0 re-read, if changed, the file pointed by fGroupConfigFile .

 expects the following directives:
 Group definition:
   group <groupname> <user>+
 disk quota
   property <groupname> diskquota <quota in GB>
 average filesize (to be used when the file size is not available)
   averagefilesize <average size>{G,g,M,m,K,k}
Long64_t ToBytes(const char* size = 0)
 Static utility function to gt the number of bytes from a string
 representation in the form "<digit><sfx>" with <sfx> = {"", "k", "M", "G",
 "T", "P"} (case insensitive).
 Returns -1 if the format is wrong.
TFileCollection * GetDataSet(const char* uri, const char* server = 0)
 Utility function used in various methods for user dataset upload.
Bool_t RemoveDataSet(const char* uri)
 Removes the indicated dataset
Bool_t ExistsDataSet(const char* uri)
 Checks if the indicated dataset exits
TMap * GetDataSets(const char* uri, UInt_t = TDataSetManager::kExport)
 Returns all datasets for the <group> and <user> specified by <uri>.
 If <user> is 0, it returns all datasets for the given <group>.
 If <group> is 0, it returns all datasets.
 The returned TMap contains:
    <group> --> <map of users> --> <map of datasets> --> <dataset> (TFileCollection)

 The unsigned int 'option' is forwarded to GetDataSet and BrowseDataSet.
 Available options (to be .or.ed):
    kShowDefault    a default selection is shown that include the ones from
                    the current user, the ones from the group and the common ones
    kPrint          print the dataset content
    kQuotaUpdate    update quotas
    kExport         use export naming

 NB1: options "kPrint", "kQuoatUpdate" and "kExport" are mutually exclusive
 NB2: for options "kPrint" and "kQuoatUpdate" return is null.
Int_t ScanDataSet(const char* uri, const char* opt)
 Scans the dataset indicated by 'uri' following the 'opts' directives

 The 'opts' string contains up to 4 directive fields separated by ':'

  'selection' field :
    A, allfiles:    process all files
    D, staged:      process only staged (on Disk) files (if 'allfiles:' is not specified
                    the default is to process only files marked as non-staged)
  'pre-action field':
    O, open:        open the files marked as staged when processing only files
                    marked as non-staged
    T, touch:       open and touch the files marked as staged when processing
                    only files marked as non-staged
    I, nostagedcheck: do not check the actual stage status on selected files

  'process' field:
    N, noaction:    do nothing on the selected files
    P, fullproc:    open the selected files and extract the meta information
    L, locateonly:  only locate the selected files
    S, stageonly:   issue a stage request for the selected files not yet staged

  'auxilliary' field
    V, verbose:     notify the actions

 Returns 0 on success, -1 if any failure occurs.
Int_t ScanDataSet(const char* uri, UInt_t option = kReopen|kDebug)
 Scans the dataset indicated by <uri> and returns the number of missing files.
 Returns -1 if any failure occurs.
 For more details, see documentation of
 ScanDataSet(TFileCollection *dataset, const char *option)
void GetQuota(const char* group, const char* user, const char* dsName, TFileCollection* dataset)
 Gets quota information from this dataset
void ShowQuota(const char* opt)
 Display quota information
void PrintUsedSpace()
 Prints the quota
void MonitorUsedSpace(TVirtualMonitoringWriter* monitoring)
 Log info to the monitoring server
Long64_t GetGroupUsed(const char* group)
 Returns the used space of that group
Long64_t GetGroupQuota(const char* group)
 returns the quota a group is allowed to have
void UpdateUsedSpace()
 updates the used space maps
Int_t RegisterDataSet(const char* uri, TFileCollection* dataSet, const char* opt)
 Register a dataset, perfoming quota checkings, if needed.
 Returns 0 on success, -1 on failure
Int_t NotifyUpdate(const char* group = 0, const char* user = 0, const char* dspath = 0, Long_t mtime = 0, const char* checksum = 0)
 Save into the <datasetdir>/dataset.list file the name of the last updated
 or created or modified dataset
 Returns 0 on success, -1 on error
Int_t ClearCache(const char* uri)
 Clear cached information matching uri
Int_t ShowCache(const char* uri)
 Show cached information matching uri
TString CreateUri(const char* dsGroup = 0, const char* dsUser = 0, const char* dsName = 0, const char* dsTree = 0)
 Creates URI for the dataset manger in the form '[[/dsGroup/]dsUser/]dsName[#dsObjPath]',
 The optional dsObjPath can be in the form [subdir/]objname]'.
Bool_t ParseUri(const char* uri, TString* dsGroup = 0, TString* dsUser = 0, TString* dsName = 0, TString* dsTree = 0, Bool_t onlyCurrent = kFALSE, Bool_t wildcards = kFALSE)
 Parses a (relative) URI that describes a DataSet on the cluster.
 The input 'uri' should be in the form '[[/group/]user/]dsname[#[subdir/]objname]',
  where 'objname' is the name of the object (e.g. the tree name) and the 'subdir'
 is the directory in the file wher it should be looked for.
 After resolving against a base URI consisting of proof://masterhost/group/user
 - meaning masterhost, group and user of the current session -
 the path is checked to contain exactly three elements separated by '/':
 group/user/dsname
 If wildcards, '*' is allowed in group and user and dsname is allowed to be empty.
 If onlyCurrent, only group and user of current session are allowed.
 Only non-null parameters are filled by this function.
 Returns kTRUE in case of success.
TMap * GetSubDataSets(const char* uri, const char* excludeservers)
 Partition dataset 'ds' accordingly to the servers.
 The returned TMap contains:
                <server> --> <subdataset> (TFileCollection)
 where <subdataset> is the subset of 'ds' on <server>
 The partitioning is done using all the URLs in the TFileInfo's, so the
 resulting datasets are not mutually exclusive.
 The string 'exclude' contains a comma-separated list of servers to exclude
 from the map.
void PrintDataSet(TFileCollection* fc, Int_t popt = 0)
 Formatted printout of the content of TFileCollection 'fc'.
 Options in the form
           popt = u * 10 + f
     f    0 => header only, 1 => header + files
   when printing files
     u    0 => print file name only, 1 => print full URL
void ShowDataSets(const char* uri = "*", const char* opt = "")
 Prints formatted information about the dataset 'uri'.
 The type and format of output is driven by 'opt':

   1. opt = "server:srv1[,srv2[,srv3[,...]]]"
            Print info about the subsets of 'uri' on servers srv1, srv2, ...
   2. opt = "servers[:exclude:srv1[,srv2[,srv3[,...]]]]"
            Print info about the subsets of 'uri' on all servers, except
            the ones in the exclude list srv1, srv2, ...
   3. opt = <any>
            Print info about all datasets matching 'uri'

   If 'opt' contains 'full:' the list of files in the datasets are also printed.
   In case 3. this is enabled only if 'uri' matches a single dataset.

   In case 3, if 'opt' contains
      'full:'      the list of files in the datasets are also printed.
      'forcescan:' the dataset are open to get the information; otherwise the
                   pre-processed information is used.
      'noheader:'  the labelling header is not printed; usefull when to chain
                   several printouts
      'noupdate:'  do not update the cache (which may be slow on very remote
                   servers)
      'refresh:'   refresh the information (requires appropriate credentials;
                   typically it can be done only for owned datasets)
Int_t ScanDataSet(TFileCollection* dataset, Int_t fopt, Int_t sopt = 0, Int_t ropt = 0, Bool_t dbg = kFALSE, Int_t* touched = 0, Int_t* opened = 0, Int_t* disappeared = 0, TList* flist = 0, Long64_t avgsz = -1, const char* mss = 0, Int_t maxfiles = -1, const char* stageopts = 0)
 Go through the files in the specified dataset, selecting files according to
 'fopt' and doing on these files the actions described by 'sopt'.
 If required, the information in 'dataset' is updated.

 The int fopt controls which files have to be processed (or added to the list
 if ropt is 1 - see below); 'fopt' is defined in term of csopt and fsopt:
                    fopt = sign(fsopt) * csopt * 100 + fsopt
 where 'fsopt' controls the actual selection
    -1              all files in the dataset
     0              process only files marked as 'non-staged'
   >=1              as 0 but files that are marked 'staged' are open
   >=2              as 1 but files that are marked 'staged' are touched
    10              process only files marked as 'staged'; files marked as 'non-staged'
                    are ignored
 and 'csopt' controls if an actual check on the staged status (via TFileStager) is done
     0              check that the file is staged using TFileStager
     1              do not hard check the staged status
 (example: use fopt = -101 to check the staged status of all the files, or fopt = 110
  to re-check the stage status of all the files marked as staged)

 If 'dbg' is true, some information about the ongoing operations is reguraly
 printed; this can be useful when processing very large datasets, an operation
 which can take a very long time.

 The int 'sopt' controls what is done on the selected files (this is effective only
 if ropt is 0 or 2 - see below):
    -1              no action (fopt = 2 and sopt = -1 touches all staged files)
     0              do the full process: open the files and fill the meta-information
                    in the TFileInfo object, including the end-point URL
     1              only locate the files, by updating the end-point URL (uses TFileStager::Locate
                    which is must faster of an TFile::Open)
     2              issue a stage request on the files

 The int 'ropt' controls which actions are performed:
     0              do the full process: get list of files to process and process them
     1              get the list of files to be scanned and return it in flist
     2              process the files in flist (according to sopt)
 When defined flist is under the responsability the caller.

 If avgsz > 0 it is used for the final update of the dataset global counters.

 If 'mss' is defined use it to initialize the stager (instead of the Url in the
 TFileInfo objects)

 If maxfiles > 0, select for processing a maximum of 'filesmax' files (but if fopt is 1 or 2
 all files marked as 'staged' are still open or touched)

 Return code
     1 dataset was not changed
     2 dataset was changed

 The number of touched, opened and disappeared files are returned in the respective
 variables, if these are defined.
Int_t ScanFile(TFileInfo* fileinfo, Bool_t notify)
 Open the file described by 'fileinfo' to extract the relevant meta-information.
 Return 0 if OK, -2 if the file cannot be open, -1 if it is corrupted
Int_t FillMetaData(TFileInfo* fi, TDirectory* d, const char* rdir = "/")
 Navigate the directory 'd' (and its subdirectories) looking for TTree objects.
 Fill in the relevant metadata information in 'fi'. The name of the TFileInfoMeta
 metadata entry will be "/dir1/dir2/.../tree_name".
 Return 0 on success, -1 if any problem happens (object found in keys cannot be read,
 for example)
TList * ParseDataSetSrvMaps(const TString& srvmaps)
 Create a server mapping list from the content of 'srvmaps'
 Return the list (owned by the caller) or 0 if no valid info could be found)
TList * GetDataSetSrvMaps()
 Static getter for server mapping list
Bool_t CheckDataSetSrvMaps(TUrl* furl, TString& fn, TList* srvmaplist = 0)
 Check if the dataset server mappings apply to the url defined by 'furl'.
 Use srvmaplist if defined, else use the default list.
 If yes, resolve the mapping into file1 and return kTRUE.
 Otherwise return kFALSE.
void SetScanCounters(Int_t t = -1, Int_t o = -1, Int_t d = -1)
 Update scan counters
TDataSetManager(const TDataSetManager& )
TDataSetManager& operator=(const TDataSetManager& )
TMap * GetGroupUsedMap()
 for dataset files (init from DataSet.SrvMap)
{ return &fGroupUsed; }
TMap * GetUserUsedMap()
{ return &fUserUsed; }
Int_t GetNTouchedFiles() const
{ return fNTouchedFiles; }
Int_t GetNOpenedFiles() const
{ return fNOpenedFiles; }
Int_t GetNDisapparedFiles() const
{ return fNDisappearedFiles; }
Long64_t GetAvgFileSize() const
{ return fAvgFileSize; }
TMap * GetGroupQuotaMap()
{ return &fGroupQuota; }