Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RRawFile.hxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_RRawFile
13#define ROOT_RRawFile
14
15#include <string_view>
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24
25/**
26 * \class RRawFile RRawFile.hxx
27 * \ingroup IO
28 *
29 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32 * RDataSource implementations and for RNTuple.
33 *
34 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36 * opened when required (on reading, getting file size) and closed on object destruction.
37 *
38 * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though.
39 *
40 * RRawFile objects are conditionally thread safe. See the user manual for further details:
41 * https://root.cern/manual/thread_safety/
42 */
43class RRawFile {
44public:
45 /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
46 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
47 /// kAuto detects the line break from the first line, kSystem picks the system's default
49
50 // Combination of flags provided by derived classes about the nature of the file
51 /// GetSize() does not return kUnknownFileSize
52 static constexpr int kFeatureHasSize = 0x01;
53 /// Map() and Unmap() are implemented
54 static constexpr int kFeatureHasMmap = 0x02;
55 /// File supports async IO
56 static constexpr int kFeatureHasAsyncIo = 0x04;
57
58 /// On construction, an ROptions parameter can customize the RRawFile behavior
59 struct ROptions {
61 /**
62 * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
63 * that the protocol-dependent default block size should be used.
64 */
67 };
68
69 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
70 /// single byte range from disk into multiple buffers.
71 struct RIOVec {
72 /// The destination for reading
73 void *fBuffer = nullptr;
74 /// The file offset
75 std::uint64_t fOffset = 0;
76 /// The number of desired bytes
77 std::size_t fSize = 0;
78 /// The number of actually read bytes, set by ReadV()
79 std::size_t fOutBytes = 0;
80 };
81
82 /// Implementations may enforce limits on the use of vector reads. These limits can depend on the server or
83 /// the specific file opened and can be queried per RRawFile object through GetReadVLimits().
84 /// Note that due to such limits, a vector read with a single request can behave differently from a Read() call.
85 struct RIOVecLimits {
86 /// Maximum number of elements in a ReadV request vector
87 std::size_t fMaxReqs = static_cast<std::size_t>(-1);
88 /// Maximum size in bytes of any single request in the request vector
89 std::size_t fMaxSingleSize = static_cast<std::size_t>(-1);
90 /// Maximum size in bytes of the sum of requests in the vector
91 std::uint64_t fMaxTotalSize = static_cast<std::uint64_t>(-1);
92
93 bool HasReqsLimit() const { return fMaxReqs != static_cast<std::size_t>(-1); }
94 bool HasSizeLimit() const
95 {
96 return fMaxSingleSize != static_cast<std::size_t>(-1) || fMaxTotalSize != static_cast<std::uint64_t>(-1);
97 }
98 };
99
100private:
101 /// Don't change without adapting ReadAt()
102 static constexpr unsigned int kNumBlockBuffers = 2;
104 /// Where in the open file does fBuffer start
105 std::uint64_t fBufferOffset;
106 /// The number of currently buffered bytes in fBuffer
108 /// Points into the I/O buffer with data from the file, not owned.
109 unsigned char *fBuffer;
110
112 RBlockBuffer(const RBlockBuffer &) = delete;
114 ~RBlockBuffer() = default;
115
116 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
117 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
118 };
119 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
120 unsigned int fBlockBufferIdx;
121 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
123 /// Memory block containing the block buffers consecutively
124 unsigned char *fBufferSpace;
125 /// The cached file size
126 std::uint64_t fFileSize;
127 /// Files are opened lazily and only when required; the open state is kept by this flag
129
130protected:
131 std::string fUrl;
133 /// The current position in the file, which can be changed by Seek, Read, and Readln
134 std::uint64_t fFilePos;
135
136 /**
137 * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
138 * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
139 * fOptions.fBlocksize must be larger or equal to zero.
140 */
141 virtual void OpenImpl() = 0;
142 /**
143 * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
144 * therefore derived classes should return nbytes bytes if available.
145 */
146 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
147 /// Derived classes should return the file size or kUnknownFileSize
148 virtual std::uint64_t GetSizeImpl() = 0;
149
150 /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
151 /// The default implementation throws an error
152 virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
153 /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
154 virtual void UnmapImpl(void *region, size_t nbytes);
155
156 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
157 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
158
159 /// Open the file if not already open. Otherwise noop.
160 void EnsureOpen();
161
162public:
163 RRawFile(std::string_view url, ROptions options);
164 RRawFile(const RRawFile &) = delete;
165 RRawFile &operator=(const RRawFile &) = delete;
166 virtual ~RRawFile();
167
168 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
169 virtual std::unique_ptr<RRawFile> Clone() const = 0;
170
171 /// Factory method that returns a suitable concrete implementation according to the transport in the url
172 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
173 /// Returns only the file location, e.g. "server/file" for http://server/file
174 static std::string GetLocation(std::string_view url);
175 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
176 static std::string GetTransport(std::string_view url);
177
178 /**
179 * Buffered read from a random position. Returns the actual number of bytes read.
180 * Short reads indicate the end of the file
181 */
182 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
183 /// Read from fFilePos offset. Returns the actual number of bytes read.
184 size_t Read(void *buffer, size_t nbytes);
185 /// Change the cursor fFilePos
186 void Seek(std::uint64_t offset);
187 /// Returns the offset for the next Read/Readln call
188 std::uint64_t GetFilePos() const { return fFilePos; }
189 /// Returns the size of the file
190 std::uint64_t GetSize();
191 /// Returns the url of the file
192 std::string GetUrl() const;
193
194 /// Opens the file if necessary and calls ReadVImpl
195 void ReadV(RIOVec *ioVec, unsigned int nReq);
196 /// Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a side-effect.
198
199 /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
200 /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
201 /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
202 void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
203 /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
204 void Unmap(void *region, size_t nbytes);
205
206 /// Derived classes shall inform the user about the supported functionality, which can possibly depend
207 /// on the file at hand
208 virtual int GetFeatures() const = 0;
209
210 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
211 bool Readln(std::string &line);
212
213 /// Once opened, the file stay open until destruction of the RRawFile object
214 bool IsOpen() const { return fIsOpen; }
215}; // class RRawFile
216
217} // namespace Internal
218} // namespace ROOT
219
220#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition RRawFile.hxx:124
std::uint64_t GetFilePos() const
Returns the offset for the next Read/Readln call.
Definition RRawFile.hxx:188
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition RRawFile.cxx:126
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition RRawFile.hxx:120
RRawFile & operator=(const RRawFile &)=delete
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition RRawFile.hxx:134
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition RRawFile.cxx:108
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition RRawFile.hxx:54
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition RRawFile.cxx:259
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition RRawFile.cxx:114
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition RRawFile.cxx:148
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition RRawFile.hxx:102
std::uint64_t GetSize()
Returns the size of the file.
Definition RRawFile.cxx:134
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition RRawFile.cxx:73
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition RRawFile.cxx:254
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition RRawFile.hxx:46
virtual RIOVecLimits GetReadVLimits()
Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a ...
Definition RRawFile.hxx:197
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition RRawFile.hxx:48
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition RRawFile.cxx:171
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition RRawFile.hxx:128
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition RRawFile.cxx:221
RRawFile(const RRawFile &)=delete
void EnsureOpen()
Open the file if not already open. Otherwise noop.
Definition RRawFile.cxx:99
bool IsOpen() const
Once opened, the file stay open until destruction of the RRawFile object.
Definition RRawFile.hxx:214
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition RRawFile.cxx:215
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition RRawFile.hxx:52
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition RRawFile.cxx:164
static constexpr int kFeatureHasAsyncIo
File supports async IO.
Definition RRawFile.hxx:56
std::string GetUrl() const
Returns the url of the file.
Definition RRawFile.cxx:144
std::uint64_t fFileSize
The cached file size.
Definition RRawFile.hxx:126
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition RRawFile.hxx:122
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition RRawFile.cxx:121
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition RRawFile.cxx:158
TLine * line
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition RRawFile.hxx:105
RBlockBuffer(const RBlockBuffer &)=delete
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition RRawFile.hxx:109
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition RRawFile.cxx:46
RBlockBuffer & operator=(const RBlockBuffer &)=delete
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition RRawFile.hxx:107
Implementations may enforce limits on the use of vector reads.
Definition RRawFile.hxx:85
std::size_t fMaxSingleSize
Maximum size in bytes of any single request in the request vector.
Definition RRawFile.hxx:89
std::uint64_t fMaxTotalSize
Maximum size in bytes of the sum of requests in the vector.
Definition RRawFile.hxx:91
std::size_t fMaxReqs
Maximum number of elements in a ReadV request vector.
Definition RRawFile.hxx:87
Used for vector reads from multiple offsets into multiple buffers.
Definition RRawFile.hxx:71
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition RRawFile.hxx:79
std::size_t fSize
The number of desired bytes.
Definition RRawFile.hxx:77
void * fBuffer
The destination for reading.
Definition RRawFile.hxx:73
std::uint64_t fOffset
The file offset.
Definition RRawFile.hxx:75
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition RRawFile.hxx:59
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition RRawFile.hxx:65