Logo ROOT   6.16/01
Reference Guide
THDFSFile.cxx
Go to the documentation of this file.
1// @(#)root/hdfs:$Id$
2// Author: Brian Bockelman 29/09/2009
3
4/*************************************************************************
5 * Copyright (C) 1995-2002, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/**
13\class THDFSFile
14\ingroup IO
15
16Reads and writes its data via the HDFS protocols
17
18A THDFSFile is like a normal TFile except that it reads and writes
19its data via the HDFS protocols. For more information on HDFS, see
20http://hadoop.apache.org/hdfs/.
21This implementation interfaces with libhdfs, which is a JNI-based
22library (i.e., it will start a Java JVM internally the first time
23it is called). At a minimum, you will need your environment's
24$CLASSPATH variable set up properly to use. Here's an example of
25one way to properly set your classpath, assuming you use the OSG
26distribution of Hadoop:
27 $ source $HADOOP_CONF_DIR/hadoop-env.sh
28 $ export CLASSPATH=$HADOOP_CLASSPATH
29Additionally, you will need a valid libjvm in your $LD_LIBRARY_PATH
30This is usually found in either:
31 $JAVA_HOME/jre/lib/i386/server
32or
33 $JAVA_HOME/jre/lib/amd64/server
34This file can only be used if hdfs support is compiled into ROOT.
35The HDFS URLs follow the Hadoop notation and should be of the form:
36 hdfs://[host:port]/absolute/path/to/file/in/HDFS.root
37Any host or port information will be ignored; this is taken from the
38node's HDFS configuration files.
39
40Example HDFS URLs:
41
42 hdfs:///user/username/dir1/file2.root
43 hdfs://localhost/user/username/dir1/file2.root
44*/
45
46#include "syslog.h"
47#include "assert.h"
48#include "stdlib.h"
49
50#include "THDFSFile.h"
51#include "TError.h"
52#include "TSystem.h"
53#include "TROOT.h"
54
55#include "hdfs.h"
56//#include "hdfsJniHelper.h"
57
58// For now, we don't allow any write/fs modification operations.
60
61static const char hdfs_default_host[] = "default";
62static const int hdfs_default_port = 0;
63
64// The following snippet is used for developer-level debugging
65// Contributed by Pete Wyckoff of the HDFS project
66#define THDFSFile_TRACE
67#ifndef THDFSFile_TRACE
68#define TRACE(x) \
69 Debug("THDFSFile", "%s", x);
70#else
71#define TRACE(x);
72#endif
73
75
76////////////////////////////////////////////////////////////////////////////////
77/// Usual Constructor. See the TFile constructor for details.
78
79THDFSFile::THDFSFile(const char *path, Option_t *option,
80 const char *ftitle, Int_t compress):
81 TFile(path, "WEB", ftitle, compress)
82{
83 fHdfsFH = 0;
84 fFS = 0;
85 fSize = -1;
86 fSysOffset = 0;
87
88 fOption = option;
90 Bool_t create = (fOption == "CREATE") ? kTRUE : kFALSE;
91 Bool_t recreate = (fOption == "RECREATE") ? kTRUE : kFALSE;
92 Bool_t update = (fOption == "UPDATE") ? kTRUE : kFALSE;
93 Bool_t read = (fOption == "READ") ? kTRUE : kFALSE;
94 if (!create && !recreate && !update && !read) {
95 read = kTRUE;
96 fOption = "READ";
97 }
98
99 Bool_t has_authn = kTRUE;
100
101 struct hdfsBuilder *bld = hdfsNewBuilder();
102 if (!bld) {
103 SysError("THDFSFile", "Error creating hdfs builder");
104 goto zombie;
105 }
106
107 hdfsBuilderSetNameNode(bld, hdfs_default_host);
108 hdfsBuilderSetNameNodePort(bld, hdfs_default_port);
109 if (has_authn) {
110 UserGroup_t *ugi = gSystem->GetUserInfo((char *)0);
111 const char *user = (ugi->fUser).Data();
112 hdfsBuilderSetUserName(bld, user);
113 delete ugi;
114 }
115
116 fFS = hdfsBuilderConnect(bld);
117
118 if (fFS == 0) {
119 SysError("THDFSFile", "HDFS client for %s cannot open the filesystem",
120 path);
121 goto zombie;
122 }
123
124 if (create || update || recreate) {
125 Int_t mode = O_RDWR | O_CREAT;
126 if (recreate) mode |= O_TRUNC;
127
128#ifndef WIN32
129 fD = SysOpen(path, mode, 0644);
130#else
131 fD = SysOpen(path, mode | O_BINARY, S_IREAD | S_IWRITE);
132#endif
133 if (fD == -1) {
134 SysError("THDFSFile", "file %s can not be opened", path);
135 goto zombie;
136 }
138 } else {
139#ifndef WIN32
140 fD = SysOpen(path, O_RDONLY, 0644);
141#else
142 fD = SysOpen(path, O_RDONLY | O_BINARY, S_IREAD | S_IWRITE);
143#endif
144 if (fD == -1) {
145 SysError("THDFSFile", "file %s can not be opened for reading", path);
146 goto zombie;
147 }
149 }
150
151 Init(create || recreate);
152
153 return;
154
155zombie:
156 // Error in opening file; make this a zombie
157 MakeZombie();
159}
160
161////////////////////////////////////////////////////////////////////////////////
162/// Close and clean-up HDFS file.
163
165{
166 TRACE("destroy")
167
168 // We assume that the file is closed in SysClose
169 // Explicitly release reference to HDFS filesystem object.
170 // Turned off now due to compilation issues.
171 // The very awkward way of releasing HDFS FS objects (by accessing JNI
172 // internals) is going away in the next libhdfs version.
173}
174
175////////////////////////////////////////////////////////////////////////////////
176/// Read specified number of bytes from current offset into the buffer.
177/// See documentation for TFile::SysRead().
178
180{
181 TRACE("READ")
182 tSize num_read_total = 0;
183
184 do {
185 tSize num_read = hdfsRead((hdfsFS)fFS, (hdfsFile)fHdfsFH, (char *)buf + num_read_total, len - num_read_total);
186 num_read_total += num_read;
187 if (num_read < 0) {
188 gSystem->SetErrorStr(strerror(errno));
189 break;
190 } else if (num_read == 0) {
191 break;
192 }
193 } while (num_read_total < len);
194
195 fSysOffset += num_read_total;
196 return num_read_total;
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// Seek to a specified position in the file. See TFile::SysSeek().
201/// Note that THDFSFile does not support seeks when the file is open for write.
202
204{
205 TRACE("SEEK")
206 if (whence == SEEK_SET)
207 fSysOffset = offset;
208 else if (whence == SEEK_CUR)
209 fSysOffset += offset;
210 else if (whence == SEEK_END) {
211 if (offset > 0) {
212 SysError("THDFSFile", "Unable to seek past end of file");
213 return -1;
214 }
215 if (fSize == -1) {
216 hdfsFileInfo *info = hdfsGetPathInfo((hdfsFS)fFS, fPath);
217 if (info != 0) {
218 fSize = info->mSize;
219 free(info);
220 } else {
221 SysError("THDFSFile", "Unable to seek to end of file");
222 return -1;
223 }
224 }
226 } else {
227 SysError("THDFSFile", "Unknown whence!");
228 return -1;
229 }
230
231 if (hdfsSeek((hdfsFS)fFS, (hdfsFile)fHdfsFH, fSysOffset) != 0) {
232 SysError("THDFSFile", "Unable to seek to the given position");
233 return -1;
234 }
235
236 return fSysOffset;
237}
238
239////////////////////////////////////////////////////////////////////////////////
240/// Open a file in HDFS.
241
242Int_t THDFSFile::SysOpen(const char * pathname, Int_t flags, UInt_t)
243{
244 // This is given to us as a URL in Hadoop notation (hdfs://hadoop-name:9000/user/foo/bar or
245 // hdfs:///user/foo/bar); convert this to a file name.
246 fUrl = TUrl(pathname);
247
249 if (!fPath.BeginsWith("/")) {
250 fPath.Insert(0, '/');
251 }
252
253 if ((fHdfsFH = hdfsOpenFile((hdfsFS)fFS, fPath, flags, 0, 0, 0)) == 0) {
254 SysError("THDFSFile", "Unable to open file %s in HDFS", pathname);
255 return -1;
256 }
257 return 1;
258}
259
260////////////////////////////////////////////////////////////////////////////////
261/// Close the file in HDFS.
262
264{
265 int result = hdfsCloseFile((hdfsFS)fFS, (hdfsFile)fHdfsFH);
266 fFS = 0;
267 fHdfsFH = 0;
268 return result;
269}
270
271////////////////////////////////////////////////////////////////////////////////
272/// Write a buffer into the file; this is not supported currently.
273
275{
276 errno = ENOSYS;
277 return -1;
278}
279
280////////////////////////////////////////////////////////////////////////////////
281/// Perform a stat on the HDFS file; see TFile::SysStat().
282
284{
285 *id = ::Hash(fPath);
286
287 hdfsFileInfo *info = hdfsGetPathInfo((hdfsFS)fFS, fPath);
288 if (info != 0) {
289 fSize = info->mSize;
290 *size = fSize;
291 if (info->mKind == kObjectKindFile)
292 *flags = 0;
293 else if (info->mKind == kObjectKindDirectory)
294 *flags = 1;
295 *modtime = info->mLastMod;
296 free(info);
297 } else {
298 return 1;
299 }
300
301 return 0;
302}
303
304////////////////////////////////////////////////////////////////////////////////
305/// Sync remaining data to disk; Not supported by HDFS.
306
308{
309 errno = ENOSYS;
310 return -1;
311}
312
313////////////////////////////////////////////////////////////////////////////////
314/// ResetErrno; simply calls TSystem::ResetErrno().
315
317{
319}
320
321
322/**
323\class THDFSSystem
324\ingroup IO
325
326Directory handler for HDFS (THDFSFile).
327*/
328
329
331
332////////////////////////////////////////////////////////////////////////////////
333
334THDFSSystem::THDFSSystem() : TSystem("-hdfs", "HDFS Helper System")
335{
336 SetName("hdfs");
337
338 Bool_t has_authn = kTRUE;
339
340 struct hdfsBuilder *bld = hdfsNewBuilder();
341 if (!bld) {
342 SysError("THDFSSystem", "Error creating hdfs builder");
343 goto zombie;
344 }
345
346 hdfsBuilderSetNameNode(bld, hdfs_default_host);
347 hdfsBuilderSetNameNodePort(bld, hdfs_default_port);
348 if (has_authn) {
349 UserGroup_t *ugi = gSystem->GetUserInfo((char *)0);
350 const char *user = (ugi->fUser).Data();
351 hdfsBuilderSetUserName(bld, user);
352 delete ugi;
353 }
354
355 fFH = hdfsBuilderConnect(bld);
356
357 if (fFH == 0) {
358 SysError("THDFSSystem", "HDFS client cannot open the filesystem");
359 goto zombie;
360 }
361
362 fDirp = 0;
363
364 return;
365
366zombie:
367 // Error in opening file; make this a zombie
368 MakeZombie();
370
371}
372
373////////////////////////////////////////////////////////////////////////////////
374/// Make a directory.
375
377{
378 if (fFH != 0) {
379 Error("MakeDirectory", "No filesystem handle (should never happen)");
380 return -1;
381 }
382 TUrl url(path);
383
385 return hdfsCreateDirectory((hdfsFS)fFH, url.GetFileAndOptions());
386 } else {
387 return -1;
388 }
389
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// Open a directory via hdfs. Returns an opaque pointer to a dir
394/// structure. Returns 0 in case of error.
395
396void *THDFSSystem::OpenDirectory(const char * path)
397{
398 if (fFH == 0) {
399 Error("OpenDirectory", "No filesystem handle (should never happen)");
400 return 0;
401 }
402 TUrl url(path);
403 fDirp = 0;
404/*
405 if (fDirp) {
406 Error("OpenDirectory", "invalid directory pointer (should never happen)");
407 fDirp = 0;
408 }
409*/
410
411 hdfsFileInfo * dir = 0;
412 if ((dir = hdfsGetPathInfo((hdfsFS)fFH, url.GetFileAndOptions())) == 0) {
413 return 0;
414 }
415 if (dir->mKind != kObjectKindDirectory) {
416 return 0;
417 }
418
419 fDirp = (void *)hdfsListDirectory((hdfsFS)fFH, url.GetFileAndOptions(), &fDirEntries);
420 fDirCtr = 0;
421
422 fUrlp = new TUrl[fDirEntries];
423
424 return fDirp;
425}
426
427////////////////////////////////////////////////////////////////////////////////
428
430{
431 if (fFH == 0) {
432 Error("FreeDirectory", "No filesystem handle (should never happen)");
433 return;
434 }
435 if (dirp != fDirp) {
436 Error("FreeDirectory", "invalid directory pointer (should never happen)");
437 return;
438 }
439 if (fUrlp != 0) {
440 delete[] fUrlp;
441 }
442
443 hdfsFreeFileInfo((hdfsFileInfo *)fDirp, fDirEntries);
444 fDirp=0;
445}
446
447////////////////////////////////////////////////////////////////////////////////
448
449const char *THDFSSystem::GetDirEntry(void *dirp)
450{
451 if (fFH == 0) {
452 Error("GetDirEntry", "No filesystem handle (should never happen)");
453 return 0;
454 }
455 if (dirp != fDirp) {
456 Error("GetDirEntry", "invalid directory pointer (should never happen)");
457 return 0;
458 }
459 if (dirp == 0) {
460 Error("GetDirEntry", "Passed an invalid directory pointer.");
461 return 0;
462 }
463
464 if (fDirCtr == fDirEntries-1) {
465 return 0;
466 }
467
468 hdfsFileInfo *fileInfo = ((hdfsFileInfo *)dirp) + fDirCtr;
469 fUrlp[fDirCtr].SetUrl(fileInfo->mName);
470 const char * result = fUrlp[fDirCtr].GetFile();
471 TUrl tempUrl;
472 tempUrl.SetUrl("hdfs:///");
473 tempUrl.SetFile(result);
474 fUrlp[fDirCtr].SetUrl(tempUrl.GetUrl());
475 result = fUrlp[fDirCtr].GetUrl();
476 fDirCtr++;
477
478 return result;
479}
480
481////////////////////////////////////////////////////////////////////////////////
482/// Get info about a file. Info is returned in the form of a FileStat_t
483/// structure (see TSystem.h).
484/// The function returns 0 in case of success and 1 if the file could
485/// not be stat'ed.
486
488{
489 if (fFH == 0) {
490 Error("GetPathInfo", "No filesystem handle (should never happen)");
491 return 1;
492 }
493
494 TUrl url(path);
495
496 hdfsFileInfo *fileInfo = hdfsGetPathInfo((hdfsFS)fFH, url.GetFileAndOptions());
497
498 if (fileInfo == 0)
499 return 1;
500
501 buf.fDev = 0;
502 buf.fIno = 0;
503 buf.fMode = fileInfo->mPermissions;
504 buf.fUid = gSystem->GetUid(fileInfo->mOwner);
505 buf.fGid = gSystem->GetGid(fileInfo->mGroup);
506 buf.fSize = fileInfo->mSize;
507 buf.fMtime = fileInfo->mLastAccess;
508 buf.fIsLink = kFALSE;
509
510 return 0;
511}
512
513////////////////////////////////////////////////////////////////////////////////
514/// Returns FALSE if one can access a file using the specified access mode.
515/// Mode is the same as for the Unix access(2) function.
516/// Attention, bizarre convention of return value!!
517
519{
520 if (mode & kExecutePermission || mode & kWritePermission)
521 return kTRUE;
522
523 if (fFH == 0) {
524 Error("AccessPathName", "No filesystem handle (should never happen)");
525 return kTRUE;
526 }
527
528 TUrl url(path);
529
530 if (hdfsExists((hdfsFS)fFH, url.GetFileAndOptions()) == 0)
531 return kFALSE;
532 else
533 return kTRUE;
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// Unlink, i.e. remove, a file or directory. Returns 0 when successful,
538/// -1 in case of failure.
539
540Int_t THDFSSystem::Unlink(const char * path)
541{
542 if (fFH == 0) {
543 Error("Unlink", "No filesystem handle (should never happen)");
544 return kTRUE;
545 }
546
548 return hdfsDelete((hdfsFS)fFH, path, 1);
549 } else {
550 return -1;
551 }
552}
static void update(gsl_integration_workspace *workspace, double a1, double b1, double area1, double error1, double a2, double b2, double area2, double error2)
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
long Long_t
Definition: RtypesCore.h:50
bool Bool_t
Definition: RtypesCore.h:59
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:363
#define gDirectory
Definition: TDirectory.h:213
static const int hdfs_default_port
Definition: THDFSFile.cxx:62
#define TRACE(x)
Definition: THDFSFile.cxx:71
static const char hdfs_default_host[]
Definition: THDFSFile.cxx:61
static const Bool_t R__HDFS_ALLOW_CHANGES
Definition: THDFSFile.cxx:59
#define gROOT
Definition: TROOT.h:410
EAccessMode
Definition: TSystem.h:44
@ kExecutePermission
Definition: TSystem.h:46
@ kWritePermission
Definition: TSystem.h:47
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
#define free
Definition: civetweb.c:1539
#define O_BINARY
Definition: civetweb.c:799
Bool_t fWritable
True if directory is writable.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
virtual void Init(Bool_t create)
Initialize a TFile object.
Definition: TFile.cxx:592
TString fOption
File options.
Definition: TFile.h:86
Int_t fD
File descriptor.
Definition: TFile.h:77
Reads and writes its data via the HDFS protocols.
Definition: THDFSFile.h:18
TString fPath
HDFS path.
Definition: THDFSFile.h:26
THDFSFile(const char *path, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose)
Usual Constructor. See the TFile constructor for details.
Definition: THDFSFile.cxx:79
Int_t SysWrite(Int_t fd, const void *buf, Int_t len)
Write a buffer into the file; this is not supported currently.
Definition: THDFSFile.cxx:274
Int_t SysClose(Int_t fd)
Close the file in HDFS.
Definition: THDFSFile.cxx:263
Int_t SysOpen(const char *pathname, Int_t flags, UInt_t mode)
Open a file in HDFS.
Definition: THDFSFile.cxx:242
Int_t SysStat(Int_t fd, Long_t *id, Long64_t *size, Long_t *flags, Long_t *modtime)
Perform a stat on the HDFS file; see TFile::SysStat().
Definition: THDFSFile.cxx:283
void ResetErrno() const
ResetErrno; simply calls TSystem::ResetErrno().
Definition: THDFSFile.cxx:316
TUrl fUrl
HDFS url.
Definition: THDFSFile.h:25
void * fFS
HDFS user handle.
Definition: THDFSFile.h:22
Int_t SysSync(Int_t fd)
Sync remaining data to disk; Not supported by HDFS.
Definition: THDFSFile.cxx:307
Long64_t fSize
File size.
Definition: THDFSFile.h:23
Long64_t fSysOffset
Seek offset in file.
Definition: THDFSFile.h:24
Long64_t SysSeek(Int_t fd, Long64_t offset, Int_t whence)
Seek to a specified position in the file.
Definition: THDFSFile.cxx:203
Int_t SysRead(Int_t fd, void *buf, Int_t len)
Read specified number of bytes from current offset into the buffer.
Definition: THDFSFile.cxx:179
virtual ~THDFSFile()
Close and clean-up HDFS file.
Definition: THDFSFile.cxx:164
void * fHdfsFH
HDFS file handle.
Definition: THDFSFile.h:21
Directory handler for HDFS (THDFSFile).
Definition: THDFSFile.h:48
void * OpenDirectory(const char *name)
Open a directory via hdfs.
Definition: THDFSFile.cxx:396
void * fFH
HDFS filesystem handle.
Definition: THDFSFile.h:51
void FreeDirectory(void *dirp)
Free a directory.
Definition: THDFSFile.cxx:429
Bool_t AccessPathName(const char *path, EAccessMode mode)
Returns FALSE if one can access a file using the specified access mode.
Definition: THDFSFile.cxx:518
TUrl * fUrlp
Pointer to the array of directory content URLs.
Definition: THDFSFile.h:53
Int_t fDirEntries
The number of entries in the fDirp array.
Definition: THDFSFile.h:54
Int_t MakeDirectory(const char *name)
Make a directory.
Definition: THDFSFile.cxx:376
const char * GetDirEntry(void *dirp)
Get a directory entry. Returns 0 if no more entries.
Definition: THDFSFile.cxx:449
Int_t fDirCtr
The current position in the fDirp array.
Definition: THDFSFile.h:55
void * fDirp
Pointer to the array of file information.
Definition: THDFSFile.h:52
Int_t GetPathInfo(const char *path, FileStat_t &buf)
Get info about a file.
Definition: THDFSFile.cxx:487
Int_t Unlink(const char *path)
Unlink, i.e.
Definition: THDFSFile.cxx:540
virtual ULong_t Hash() const
Return hash value for this object.
Definition: TNamed.h:49
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
virtual void SysError(const char *method, const char *msgfmt,...) const
Issue system error message.
Definition: TObject.cxx:894
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
void MakeZombie()
Definition: TObject.h:49
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:644
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1113
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:610
Abstract base class defining a generic interface to the underlying Operating System.
Definition: TSystem.h:248
virtual Int_t GetGid(const char *group=0)
Returns the group's id. If group = 0, returns current user's group.
Definition: TSystem.cxx:1568
static void ResetErrno()
Static function resetting system error number.
Definition: TSystem.cxx:285
void SetErrorStr(const char *errstr)
Set the system error string.
Definition: TSystem.cxx:250
virtual Int_t GetUid(const char *user=0)
Returns the user's id. If user = 0, returns current user's id.
Definition: TSystem.cxx:1549
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1588
This class represents a WWW compatible URL.
Definition: TUrl.h:35
const char * GetUrl(Bool_t withDeflt=kFALSE) const
Return full URL.
Definition: TUrl.cxx:385
const char * GetFileAndOptions() const
Return the file and its options (the string specified behind the ?).
Definition: TUrl.cxx:499
const char * GetFile() const
Definition: TUrl.h:72
void SetUrl(const char *url, Bool_t defaultIsFile=kFALSE)
Parse url character string and split in its different subcomponents.
Definition: TUrl.cxx:108
void SetFile(const char *file)
Definition: TUrl.h:88
Int_t fMode
Definition: TSystem.h:128
Long64_t fSize
Definition: TSystem.h:131
Long_t fDev
Definition: TSystem.h:126
Int_t fGid
Definition: TSystem.h:130
Long_t fMtime
Definition: TSystem.h:132
Long_t fIno
Definition: TSystem.h:127
Bool_t fIsLink
Definition: TSystem.h:133
Int_t fUid
Definition: TSystem.h:129
TString fUser
Definition: TSystem.h:142