Logo ROOT   6.18/05
Reference Guide
TProofSuperMaster.cxx
Go to the documentation of this file.
1// @(#)root/proof:$Id$
2// Author: Fons Rademakers 13/02/97
3
4/*************************************************************************
5 * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TProofSuperMaster
13\ingroup proofkernel
14
15Implementation of TProof controlling PROOF federated clusters.
16
17*/
18
19#include "TProofSuperMaster.h"
20#include "TString.h"
21#include "TObjString.h"
22#include "TError.h"
23#include "TList.h"
24#include "TSortedList.h"
25#include "TSlave.h"
26#include "TMap.h"
27#include "TProofServ.h"
28#include "TSocket.h"
29#include "TMonitor.h"
30#include "TDSet.h"
31#include "TPluginManager.h"
32#include "TVirtualProofPlayer.h"
33#include "TMessage.h"
34#include "TUrl.h"
36#include "TProofNodeInfo.h"
37#include "TROOT.h"
38
40
41////////////////////////////////////////////////////////////////////////////////
42/// Start super master PROOF session.
43
44TProofSuperMaster::TProofSuperMaster(const char *masterurl, const char *conffile,
45 const char *confdir, Int_t loglevel,
46 const char *alias, TProofMgr *mgr)
47{
48 // Default initializations
50
51 // This may be needed during init
52 fManager = mgr;
53
54 fUrl = TUrl(masterurl);
55
56 if (!conffile || !conffile[0])
57 conffile = kPROOF_ConfFile;
58 else if (!strncasecmp(conffile, "sm:", 3))
59 conffile+=3;
60 if (!confdir || !confdir[0])
61 confdir = kPROOF_ConfDir;
62
63 // Instance type
68
69 Init(masterurl, conffile, confdir, loglevel, alias);
70
71 // For Final cleanup
72 gROOT->GetListOfProofs()->Add(this);
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// Start up PROOF submasters.
77
79{
80 // If this is a supermaster server, find the config file and start
81 // submaster servers as specified in the config file.
82 // There is a difference in startup between a slave and a submaster
83 // in which the submaster will issue a kPROOF_LOGFILE and
84 // then a kPROOF_LOGDONE message (which must be collected)
85 // while slaves do not.
86
87 Int_t pc = 0;
88 TList *submasterList = new TList;
89 // Get list of workers
90 if (gProofServ->GetWorkers(submasterList, pc) == TProofServ::kQueryStop) {
91 Error("StartSlaves", "getting list of submaster nodes");
92 return kFALSE;
93 }
95 if (fImage.IsNull())
96 fImage = Form("%s:%s", TUrl(gSystem->HostName()).GetHostFQDN(),
98
99 UInt_t nSubmasters = submasterList->GetSize();
100 UInt_t nSubmastersDone = 0;
101 Int_t ord = 0;
102 TList validSubmasters;
103 TList validPairs;
104 validPairs.SetOwner();
105
106 // Loop over all submasters and start them
107 TListIter next(submasterList);
108 TObject *to;
109 TProofNodeInfo *submaster;
110 while ((to = next())) {
111 // Get the next submaster from the list
112 submaster = (TProofNodeInfo *)to;
113 const Char_t *conffile = submaster->GetConfig();
114 const Char_t *image = submaster->GetImage();
115 const Char_t *msd = submaster->GetMsd();
116 Int_t sport = submaster->GetPort();
117 if (sport == -1)
118 sport = fUrl.GetPort();
119
120 TString fullord = TString(gProofServ->GetOrdinal()) + "." + ((Long_t) ord);
121
122 // create submaster server
123 TUrl u(Form("%s:%d", submaster->GetNodeName().Data(), sport));
124 // Add group info in the password firdl, if any
125 if (strlen(gProofServ->GetGroup()) > 0) {
126 // Set also the user, otherwise the password is not exported
127 if (strlen(u.GetUser()) <= 0)
130 }
131 TSlave *slave =
132 CreateSubmaster(u.GetUrl(), fullord, image, msd);
133
134 // Add to global list (we will add to the monitor list after
135 // finalizing the server startup)
136 Bool_t submasterOk = kTRUE;
137 fSlaves->Add(slave);
138 if (slave->IsValid()) {
139 validPairs.Add(new TPair(slave, new TObjString(conffile)));
140 } else {
141 submasterOk = kFALSE;
142 fBadSlaves->Add(slave);
143 }
144
145 PDB(kGlobal,3)
146 Info("StartSlaves","submaster on host %s created and"
147 " added to list", submaster->GetNodeName().Data());
148
149 // Notify opening of connection
150 nSubmastersDone++;
152 m << TString("Opening connections to submasters") << nSubmasters
153 << nSubmastersDone << submasterOk;
155
156 ord++;
157
158 } // end loop over all submasters
159
160 // Cleanup
161 SafeDelete(submasterList);
162
163 nSubmastersDone = 0;
164
165 // Here we finalize the server startup: in this way the bulk
166 // of remote operations are almost parallelized
167 TIter nxsc(&validPairs);
168 TPair *sc = 0;
169 while ((sc = (TPair *) nxsc())) {
170 // Finalize setup of the server
171 TSlave *sl = (TSlave *) sc->Key();
172 TObjString *cf = (TObjString *) sc->Value();
174
175 // Monitor good slaves
176 Bool_t submasterOk = kTRUE;
177 if (sl->IsValid()) {
178 // check protocol compatability
179 // protocol 1 is not supported anymore
180 if (fProtocol == 1) {
181 Error("StartSlaves", "master and submaster protocols"
182 " not compatible (%d and %d)",
184 submasterOk = kFALSE;
185 fBadSlaves->Add(sl);
186 } else {
187 fAllMonitor->Add(sl->GetSocket());
188 validSubmasters.Add(sl);
189 }
190 } else {
191 submasterOk = kFALSE;
192 fBadSlaves->Add(sl);
193 }
194
195 // Notify end of startup operations
196 nSubmastersDone++;
198 m << TString("Setting up submasters") << nSubmasters
199 << nSubmastersDone << submasterOk;
201 }
202
203 Collect(kAll); //Get kPROOF_LOGFILE and kPROOF_LOGDONE messages
204 TIter nextSubmaster(&validSubmasters);
205 while (TSlave* sl = dynamic_cast<TSlave*>(nextSubmaster())) {
206 if (sl->GetStatus() == -99) {
207 Error("StartSlaves", "not allowed to connect to PROOF master server");
208 fBadSlaves->Add(sl);
209 continue;
210 }
211
212 if (!sl->IsValid()) {
213 Error("StartSlaves", "failed to setup connection with PROOF master server");
214 fBadSlaves->Add(sl);
215 continue;
216 }
217 }
218
219 return kTRUE;
220}
221
222////////////////////////////////////////////////////////////////////////////////
223/// Process a data set (TDSet) using the specified selector (.C) file.
224/// Entry- or event-lists should be set in the data set object using
225/// TDSet::SetEntryList.
226/// The return value is -1 in case of error and TSelector::GetStatus() in
227/// in case of success.
228
229Long64_t TProofSuperMaster::Process(TDSet *set, const char *selector, Option_t *option,
231{
232 if (!IsValid()) return -1;
233
235
236 if (GetProgressDialog())
237 GetProgressDialog()->ExecPlugin(5, this, selector, set->GetListOfElements()->GetSize(),
238 first, nentries);
239
240 return GetPlayer()->Process(set, selector, option, nentries, first);
241}
242
243////////////////////////////////////////////////////////////////////////////////
244/// Validate a TDSet.
245
247{
248 if (dset->ElementsValid()) return;
249
250 // We need to recheck after this
253
254 TList msds;
255 msds.SetOwner();
256
257 TList smholder;
258 smholder.SetOwner();
259 TList elemholder;
260 elemholder.SetOwner();
261
262 // build nodelist with slaves and elements
263 TIter nextSubmaster(GetListOfActiveSlaves());
264 while (TSlave *sl = dynamic_cast<TSlave*>(nextSubmaster())) {
265 TList *smlist = 0;
266 TPair *p = dynamic_cast<TPair*>(msds.FindObject(sl->GetMsd()));
267 if (!p) {
268 smlist = new TList;
269 smlist->SetName(sl->GetMsd());
270
271 smholder.Add(smlist);
272 TList *elemlist = new TSortedList(kSortDescending);
273 elemlist->SetName(TString(sl->GetMsd())+"_elem");
274 elemholder.Add(elemlist);
275 msds.Add(new TPair(smlist, elemlist));
276 } else {
277 smlist = dynamic_cast<TList*>(p->Key());
278 }
279 if (smlist) smlist->Add(sl);
280 }
281
282 TIter nextElem(dset->GetListOfElements());
283 while (TDSetElement *elem = dynamic_cast<TDSetElement*>(nextElem())) {
284 if (elem->GetValid()) continue;
285 TPair *p = dynamic_cast<TPair*>(msds.FindObject(elem->GetMsd()));
286 if (p && p->Value()) {
287 TList *xl = dynamic_cast<TList*>(p->Value());
288 if (xl) xl->Add(elem);
289 } else {
290 Error("ValidateDSet", "no mass storage domain '%s' associated"
291 " with available submasters",
292 elem->GetMsd());
293 return;
294 }
295 }
296
297 // send to slaves
298 TList usedsms;
299 TIter nextSM(&msds);
300 SetDSet(dset); // set dset to be validated in Collect()
301 while (TPair *msd = dynamic_cast<TPair*>(nextSM())) {
302 TList *sms = dynamic_cast<TList*>(msd->Key());
303 TList *setelements = dynamic_cast<TList*>(msd->Value());
304
305 // distribute elements over the slaves
306 Int_t nsms = sms ? sms->GetSize() : -1;
307 Int_t nelements = setelements ? setelements->GetSize() : -1;
308 for (Int_t i=0; i<nsms; i++) {
309
310 TDSet set(dset->GetType(), dset->GetObjName(),
311 dset->GetDirectory());
312 for (Int_t j = (i*nelements)/nsms;
313 j < ((i+1)*nelements)/nsms;
314 j++) {
315 TDSetElement *elem = setelements ?
316 dynamic_cast<TDSetElement*>(setelements->At(j)) : (TDSetElement *)0;
317 if (elem) {
318 set.Add(elem->GetFileName(), elem->GetObjName(),
319 elem->GetDirectory(), elem->GetFirst(),
320 elem->GetNum(), elem->GetMsd());
321 }
322 }
323
324 if (set.GetListOfElements()->GetSize()>0) {
326 mesg << &set;
327
328 TSlave *sl = dynamic_cast<TSlave*>(sms->At(i));
329 if (sl) {
330 PDB(kGlobal,1)
331 Info("ValidateDSet",
332 "Sending TDSet with %d elements to worker %s"
333 " to be validated", set.GetListOfElements()->GetSize(),
334 sl->GetOrdinal());
335 sl->GetSocket()->Send(mesg);
336 usedsms.Add(sl);
337 } else {
338 Warning("ValidateDSet", "not a TSlave object");
339 }
340 }
341 }
342 }
343
344 PDB(kGlobal,1)
345 Info("ValidateDSet","Calling Collect");
346 Collect(&usedsms);
347 SetDSet(0);
348}
349
350////////////////////////////////////////////////////////////////////////////////
351/// Construct a TProofPlayer object. The player string specifies which
352/// player should be created: remote, slave, sm (supermaster) or base.
353/// Default is sm. Socket is needed in case a slave player is created.
354
356{
357 if (!player)
358 player = "sm";
359
361 return GetPlayer();
362}
363
@ kPROOF_VALIDATE_DSET
Definition: MessageTypes.h:70
@ kPROOF_SERVERSTARTED
Definition: MessageTypes.h:82
#define SafeDelete(p)
Definition: RConfig.hxx:543
int Int_t
Definition: RtypesCore.h:41
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
long Long_t
Definition: RtypesCore.h:50
bool Bool_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:365
#define R__ASSERT(e)
Definition: TError.h:96
int nentries
Definition: THbookFile.cxx:89
const Bool_t kSortDescending
Definition: TList.h:38
#define PDB(mask, level)
Definition: TProofDebug.h:56
R__EXTERN TProofServ * gProofServ
Definition: TProofServ.h:347
const char *const kPROOF_ConfFile
Definition: TProof.h:122
const char *const kPROOF_ConfDir
Definition: TProof.h:123
const Int_t kPROOF_Protocol
Definition: TProof.h:120
#define gROOT
Definition: TROOT.h:414
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
void SetName(const char *name)
Definition: TCollection.h:204
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
Manages an element of a TDSet.
Definition: TDSet.h:66
const char * GetObjName() const
Definition: TDSet.h:120
Long64_t GetNum() const
Definition: TDSet.h:114
const char * GetDirectory() const
Return directory where to look for object.
Definition: TDSet.cxx:253
const char * GetMsd() const
Definition: TDSet.h:117
const char * GetFileName() const
Definition: TDSet.h:111
Long64_t GetFirst() const
Definition: TDSet.h:112
This class implements a data set to be used for PROOF processing.
Definition: TDSet.h:153
virtual Bool_t Add(const char *file, const char *objname=0, const char *dir=0, Long64_t first=0, Long64_t num=-1, const char *msd=0)
Add file to list of files to be analyzed.
Definition: TDSet.cxx:1052
Bool_t ElementsValid()
Check if all elements are valid.
Definition: TDSet.cxx:1556
const char * GetType() const
Definition: TDSet.h:228
TList * GetListOfElements() const
Definition: TDSet.h:231
const char * GetDirectory() const
Definition: TDSet.h:230
const char * GetObjName() const
Definition: TDSet.h:229
@ kSomeInvalid
Definition: TDSet.h:161
@ kValidityChecked
Definition: TDSet.h:160
Iterator of linked list.
Definition: TList.h:200
A doubly linked list.
Definition: TList.h:44
virtual void Add(TObject *obj)
Definition: TList.h:87
virtual TObject * FindObject(const char *name) const
Find an object in this list using its name.
Definition: TList.cxx:575
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
virtual void Add(TSocket *sock, Int_t interest=kRead)
Add socket to the monitor's active list.
Definition: TMonitor.cxx:168
Collectable string class.
Definition: TObjString.h:28
const char * GetName() const
Returns name of object.
Definition: TObjString.h:38
Mother of all ROOT objects.
Definition: TObject.h:37
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:866
void SetBit(UInt_t f, Bool_t set)
Set or unset the user status bits as specified in f.
Definition: TObject.cxx:694
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
void ResetBit(UInt_t f)
Definition: TObject.h:171
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:854
Class used by TMap to store (key,value) pairs.
Definition: TMap.h:102
TObject * Value() const
Definition: TMap.h:121
TObject * Key() const
Definition: TMap.h:120
Long_t ExecPlugin(int nargs, const T &... params)
The PROOF manager interacts with the PROOF server coordinator to create or destroy a PROOF session,...
Definition: TProofMgr.h:43
The purpose of this class is to provide a complete node description for masters, submasters and worke...
const TString & GetMsd() const
const TString & GetImage() const
const TString & GetConfig() const
const TString & GetNodeName() const
Int_t GetPort() const
const char * GetOrdinal() const
Definition: TProofServ.h:253
const char * GetImage() const
Definition: TProofServ.h:244
virtual EQueryAction GetWorkers(TList *workers, Int_t &prioritychange, Bool_t resume=kFALSE)
Get list of workers to be used from now on.
const char * GetUser() const
Definition: TProofServ.h:241
TSocket * GetSocket() const
Definition: TProofServ.h:257
const char * GetGroup() const
Definition: TProofServ.h:242
const char * GetWorkDir() const
Definition: TProofServ.h:243
Implementation of TProof controlling PROOF federated clusters.
TProofSuperMaster(const char *masterurl, const char *conffile=kPROOF_ConfFile, const char *confdir=kPROOF_ConfDir, Int_t loglevel=0, const char *alias=0, TProofMgr *mgr=0)
Start super master PROOF session.
Bool_t StartSlaves(Bool_t)
Start up PROOF submasters.
virtual TVirtualProofPlayer * MakePlayer(const char *player=0, TSocket *s=0)
Construct a TProofPlayer object.
void ValidateDSet(TDSet *dset)
Validate a TDSet.
Long64_t Process(TDSet *set, const char *selector, Option_t *option="", Long64_t nentries=-1, Long64_t firstentry=0)
Process a data set (TDSet) using the specified selector (.C) file.
TProofMgr * fManager
Definition: TProof.h:587
TVirtualProofPlayer * GetPlayer() const
Definition: TProof.h:716
TUrl fUrl
Definition: TProof.h:567
TPluginHandler * GetProgressDialog() const
Definition: TProof.h:742
Bool_t IsValid() const
Definition: TProof.h:937
Int_t Collect(const TSlave *sl, Long_t timeout=-1, Int_t endtype=-1, Bool_t deactonfail=kFALSE)
Collect responses from slave sl.
Definition: TProof.cxx:2651
void SetPlayer(TVirtualProofPlayer *player)
Set a new PROOF player.
Definition: TProof.cxx:10175
void InitMembers()
Default initializations.
Definition: TProof.cxx:524
TSlave * CreateSubmaster(const char *url, const char *ord, const char *image, const char *msd, Int_t nwk=1)
Create a new TSlave of type TSlave::kMaster.
Definition: TProof.cxx:1857
Bool_t fMasterServ
Definition: TProof.h:566
TList * fBadSlaves
Definition: TProof.h:574
@ kAll
Definition: TProof.h:564
Int_t Init(const char *masterurl, const char *conffile, const char *confdir, Int_t loglevel, const char *alias=0)
Start the PROOF environment.
Definition: TProof.cxx:749
TList * GetListOfActiveSlaves() const
Definition: TProof.h:723
@ kIsClient
Definition: TProof.h:344
@ kIsMaster
Definition: TProof.h:345
@ kIsTopMaster
Definition: TProof.h:346
Int_t fProtocol
Definition: TProof.h:571
void SetDSet(TDSet *dset)
Definition: TProof.h:737
TList * fSlaves
Definition: TProof.h:572
TMonitor * fAllMonitor
Definition: TProof.h:575
TString fImage
Definition: TProof.h:570
Class describing a PROOF worker server.
Definition: TSlave.h:46
@ kMaster
Definition: TSlave.h:55
TSocket * GetSocket() const
Definition: TSlave.h:134
virtual Int_t SetupServ(Int_t stype, const char *conffile)
Init a PROOF slave object.
Definition: TSlave.cxx:179
virtual Bool_t IsValid() const
Definition: TSlave.h:150
const char * GetOrdinal() const
Definition: TSlave.h:131
virtual Int_t Send(const TMessage &mess)
Send a TMessage object.
Definition: TSocket.cxx:521
A sorted doubly linked list.
Definition: TSortedList.h:28
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
Bool_t IsNull() const
Definition: TString.h:402
virtual const char * HostName()
Return the system's host name.
Definition: TSystem.cxx:312
This class represents a WWW compatible URL.
Definition: TUrl.h:35
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:385
void SetUser(const char *user)
Definition: TUrl.h:85
const char * GetUser() const
Definition: TUrl.h:68
const char * GetHostFQDN() const
Return fully qualified domain name of url host.
Definition: TUrl.cxx:467
Int_t GetPort() const
Definition: TUrl.h:81
void SetPasswd(const char *pw)
Definition: TUrl.h:86
Abstract interface for the PROOF player.
static TVirtualProofPlayer * Create(const char *player, TProof *p, TSocket *s=0)
Create a PROOF player.
virtual Long64_t Process(TDSet *set, const char *selector, Option_t *option="", Long64_t nentries=-1, Long64_t firstentry=0)=0
static constexpr double s
static constexpr double pc
Definition: first.py:1
auto * m
Definition: textangle.C:8