Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RCurlConnection.cxx
Go to the documentation of this file.
1// @(#)root/net:$Id$
2// Author: Jakob Blomer
3
5#include "ROOT/RError.hxx"
6#include "ROOT/RLogger.hxx"
7#include "ROOT/RVersion.hxx"
8
9#include <TError.h>
10#include <TSystem.h>
11
12#include <algorithm>
13#include <cassert>
14#include <cctype>
15#include <cerrno>
16#include <cstddef>
17#include <cstdlib>
18#include <cstring>
19#include <limits>
20#include <numeric>
21#include <stdexcept>
22#include <string>
23#include <utility>
24
25#include <curl/curl.h>
26
27#if LIBCURL_VERSION_NUM >= 0x078300
28#define HAS_CURL_EASY_HEADER
29#endif
30
31#if LIBCURL_VERSION_NUM >= 0x078000
32#define HAS_CURL_URL_STRERROR
33#endif
34
35namespace {
36
37static constexpr int kHttpResponseSuccessClass = 2;
38static constexpr int kHttpResponsePartial = 206;
39static constexpr int kHttpResponseBadRequest = 400;
40static constexpr int kHttpResponseForbidden = 403;
41static constexpr int kHttpResponseNotFound = 404;
42static constexpr int kHttpResponseRangeNotSatisfiable = 416;
43
44static constexpr int kMaxDebugDataChars = 50; ///< Maximum number of characters of debug HTTP content before snipping
45
46/// A byte range as specified in an HTTP range request header
47struct RHttpRange {
48 std::uint64_t fFirstByte = std::uint64_t(-1);
49 std::uint64_t fLastByte = std::uint64_t(-1);
50
51 std::string ToString() const { return std::to_string(fFirstByte) + "-" + std::to_string(fLastByte); }
52};
53
54/// Set up before curl_easy_perform and then passed to all the write callbacks as data is streamed from the
55/// web server.
56struct RTransferState {
57 /// The list of ranges in the order passed to SendRangesReq() and preprocessed by the range displacement,
58 /// i.e. the ranges are not overlapping (but may be empty or adjacent).
59 ROOT::Internal::RCurlConnection::RUserRange *fRanges = nullptr;
60 const std::vector<std::size_t> &fOrder; ///< Array of indices to iterate fRanges sorted by offset
61 CURL *fHandle = nullptr;
62
63 std::size_t fCurrentRange = 0; ///< Index of the user range into which incoming data is read (to be used with fOrder)
64 long fResponseCode = 0; ///< Set to the HTTP response code before the first data buffer is processed
65 std::size_t fNBytesProcessed = 0; ///< Total number of received and processed bytes (including skipped buffers)
66 bool fHasMultipartContent = false; ///< True if the server responded with HTTP 206
67 std::string fExtraMsg; ///< Information to be passed to the user through RCurlConnection::RStatus::fStatusMsg
68
69 // State for parsing the individual part headers of a multipart response
70 std::string fPartHeaders; ///< Header characters seen so far of the currently processed part
71 bool fInPartHeader = false; ///< Indicate if we are processing the content of the part of the headers
72 std::uint8_t fNEohCharsFound = 0; ///< Counts how many of the header end signature ("\r\n\r\n") we have already seen
73 std::size_t fNBytesPartRemain = 0; ///< Number of remaining bytes of the content of the current part
74
75#ifndef HAS_CURL_EASY_HEADER
76 bool fHasContentRangeHeader = false; ///< On older libcurl versions, we must parse the headers ourselves
77#endif
78
79 RTransferState(ROOT::Internal::RCurlConnection::RUserRange *ranges, const std::vector<std::size_t> &order,
80 CURL *handle)
81 : fRanges(ranges), fOrder(order), fHandle(handle)
82 {
83 }
84
85 ROOT::Internal::RCurlConnection::RUserRange &GetCurrentRange() const { return fRanges[fOrder[fCurrentRange]]; }
86
87 std::size_t GetNRanges() const { return fOrder.size(); }
88
89 /// Let fCurrentRange point to the next non-empty range (if any left)
90 void AdvanceRange()
91 {
92 do {
93 fCurrentRange++;
94 } while ((fCurrentRange < GetNRanges()) && (GetCurrentRange().fLength == 0));
95 }
96
97 bool IsPartial() const { return fResponseCode == kHttpResponsePartial; }
98};
99
100ROOT::RLogChannel &HTTPClientLog()
101{
102 static ROOT::RLogChannel sLog("ROOT.HTTPClient");
103 return sLog;
104}
105
106void EnsureCurlInitialized()
107{
108 static const auto kInitCode = curl_global_init(CURL_GLOBAL_DEFAULT);
109 if (kInitCode != CURLE_OK) {
110 // Cannot use GetCurlErrorString() because curl isn't initialized...
111 throw ROOT::RException(R__FAIL("cannot initialize curl library: " + std::to_string(kInitCode)));
112 }
113}
114
115const curl_version_info_data *GetCurlVersionInfo()
116{
117 static const curl_version_info_data *kVersionInfo = curl_version_info(CURLVERSION_NOW);
118 return kVersionInfo;
119}
120
121/// Return the information of a "content-range" header in a part of a MIME multi-part message.
122/// The headers input is the complete list of part headers.
123/// The content-range header has the form "content-range: FIRSTBYTE-LASTBYTE/LENGTH". We ignore LENGTH; the
124/// returned length is calculated from the range.
125ROOT::RResult<void> ParseContentRange(const std::string &headers, std::uint64_t &offset, std::uint64_t &length)
126{
127 // Note that headers start with a blank line, so the leading line break is always found.
128 static constexpr char kContentRangeHeader[16] = {'\r', '\n', 'c', 'o', 'n', 't', 'e', 'n',
129 't', '-', 'r', 'a', 'n', 'g', 'e', ':'};
130
131 auto it = std::search(headers.begin(), headers.end(), std::begin(kContentRangeHeader), std::end(kContentRangeHeader),
132 [](char a, char b) { return std::toupper(a) == std::toupper(b); });
133 if (it == headers.end()) {
134 return R__FAIL("cannot find 'content-range' header in multipart response");
135 }
136
137 std::string strBytePos[2]; // the first byte and the last byte of the range
138 int idxStrBytePos = 0;
139 it += sizeof(kContentRangeHeader);
140 do {
141 if (it == headers.end()) {
142 return R__FAIL("premature end of 'content-range' header in multipart response");
143 }
144
145 if (std::isspace(*it) || std::isalpha(*it)) {
146 it++;
147 continue;
148 }
149 if (*it == '-') {
150 if (idxStrBytePos == 1) {
151 return R__FAIL(std::string("invalid 'content-range' header: ") + headers);
152 }
153 idxStrBytePos++;
154 it++;
155 continue;
156 }
157 if (*it == '/')
158 break;
159
160 strBytePos[idxStrBytePos].push_back(*it);
161 it++;
162 } while (true);
163
164 if (strBytePos[0].empty() || strBytePos[1].empty())
165 return R__FAIL(std::string("invalid 'content-range' header: ") + headers);
166
167 char *end = nullptr;
168 offset = std::strtoull(strBytePos[0].c_str(), &end, 10);
169 if (errno == ERANGE)
170 return R__FAIL(std::string("invalid 'content-range' header: ") + headers);
171 auto lastBytePos = std::strtoull(strBytePos[1].c_str(), &end, 10);
172 if (errno == ERANGE)
173 return R__FAIL(std::string("invalid 'content-range' header: ") + headers);
174
175 if (lastBytePos < offset)
176 return R__FAIL(std::string("invalid 'content-range' header: ") + headers);
177
178 length = lastBytePos - offset + 1;
179 if (length > std::numeric_limits<std::size_t>::max()) {
180 return R__FAIL(std::string("part of multipart response too big: ") + std::to_string(length) + "B");
181 }
182
184}
185
186/// Process the buffer 'data' of length 'nbytes' that was received from the server as part of a MIME multipart response
187std::size_t ProcessMultipartData(char *data, std::size_t nbytes, RTransferState *transfer)
188{
189 static constexpr char kEndOfHeaders[4] = {'\r', '\n', '\r', '\n'}; // Headers always end with a blank line
190
191 std::size_t nbytesRemain = nbytes;
192
193 while (nbytesRemain > 0) {
194 if (transfer->fInPartHeader) {
195 transfer->fPartHeaders.push_back(*data);
196 if (*data == kEndOfHeaders[transfer->fNEohCharsFound]) {
197 transfer->fNEohCharsFound++;
198 if (transfer->fNEohCharsFound == sizeof(kEndOfHeaders)) {
199 transfer->fInPartHeader = false;
200 transfer->fNEohCharsFound = 0;
201 }
202 } else {
203 transfer->fNEohCharsFound = 0;
204 }
205 data++;
206 nbytesRemain--;
207
208 if (transfer->fInPartHeader)
209 continue;
210
211 // Transition from part header to part content
212 std::uint64_t partOffset = 0;
213 std::uint64_t length = 0;
214 auto result = ParseContentRange(transfer->fPartHeaders, partOffset, length);
215 transfer->fPartHeaders.clear();
216 if (!result) {
217 transfer->fExtraMsg = result.GetError()->GetReport();
218 return 0;
219 }
220 // By construction, at this point we have length > 0 and we know its small enough for fNBytesPartRemain
221 transfer->fNBytesPartRemain = length;
222
223 // Find matching range: the parts of a multipart response may come in any order (although unusual), so
224 // we need to find the corresponing range. The ranges at this point must be non-overlapping, so we can safely
225 // match on offset.
226 std::size_t i = 0;
227 for (; i < transfer->GetNRanges(); ++i) {
228 const auto &r = transfer->GetCurrentRange();
229 if ((r.fLength > 0) && (r.fOffset == partOffset))
230 break;
231 transfer->fCurrentRange = (transfer->fCurrentRange + 1) % transfer->GetNRanges();
232 }
233 if (i == transfer->GetNRanges()) {
234 transfer->fExtraMsg = std::string("unexpected part with offset ") + std::to_string(partOffset);
235 return 0;
236 }
237 } // end of part header parsing
238
239 // Process part content
240 auto &range = transfer->GetCurrentRange();
241 const std::size_t nbytesCopy =
242 std::min({nbytesRemain, transfer->fNBytesPartRemain, range.fLength - range.fNBytesRecv});
243 if (nbytesCopy > 0) {
244 memcpy(range.fDestination + range.fNBytesRecv, data, nbytesCopy);
245 }
246 data += nbytesCopy;
247 range.fNBytesRecv += nbytesCopy;
248 nbytesRemain -= nbytesCopy;
249 transfer->fNBytesPartRemain -= nbytesCopy;
250
251 if (transfer->fNBytesPartRemain == 0) {
252 // End of this part, back to parsing headers of the next part
253 transfer->AdvanceRange(); // not strictly necessary but speeds up the range matching if parts come in order
254 transfer->fInPartHeader = true;
255 } else if (range.fNBytesRecv == range.fLength) {
256 // coalesced adjacent ranges, move on to the next range in the sorted array
257 transfer->AdvanceRange();
258 if (transfer->fCurrentRange == transfer->GetNRanges()) {
259 transfer->fExtraMsg = std::string("received range too long");
260 return 0;
261 }
262 }
263 }
264
265 transfer->fNBytesProcessed += nbytes;
266 return nbytes;
267}
268
269/// Process the buffer 'data' of length 'nbytes' that was received from the server as part standard HTTP 200 response,
270/// i.e. the server ignored the range request.
271std::size_t ProcessRawData(char *data, std::size_t nbytes, RTransferState *transfer)
272{
273 std::size_t nbytesRemain = nbytes;
274
275 while ((nbytesRemain > 0) && (transfer->fCurrentRange < transfer->GetNRanges())) {
276 auto &range = transfer->GetCurrentRange();
277
278 std::size_t nbytesSkip = 0;
279 if (transfer->fNBytesProcessed < range.fOffset) {
280 // Skip the first part of the data pointer that is not yet part of the requested range at hand
281 nbytesSkip = std::min(static_cast<std::uint64_t>(nbytesRemain), range.fOffset - transfer->fNBytesProcessed);
282 }
283
284 const std::size_t nbytesCopy = std::min(nbytesRemain - nbytesSkip, range.fLength - range.fNBytesRecv);
285 if (nbytesCopy > 0) {
286 // The received buffer overlaps with the current range
288 }
289
290 range.fNBytesRecv += nbytesCopy;
291 if (range.fNBytesRecv == range.fLength)
292 transfer->AdvanceRange();
293
294 nbytesRemain -= nbytesSkip + nbytesCopy;
295 data += nbytesSkip + nbytesCopy;
296 transfer->fNBytesProcessed += nbytesSkip + nbytesCopy;
297 }
298 transfer->fNBytesProcessed += nbytesRemain;
299
300 return nbytes;
301}
302
303/// Called by libcurl as data arrives from the web server. The data buffer may be empty. This callback runs
304/// possibly repeatadly after the response headers are processed.
305std::size_t CallbackData(char *data, std::size_t size, std::size_t nmemb, void *userdata)
306{
307 std::size_t nbytes = size * nmemb;
308 if (nbytes == 0)
309 return 0;
310
311 RTransferState *transfer = static_cast<RTransferState *>(userdata);
312
313 // Four possible successful responses:
314 // 1) Full document, server ignores the ranges in the request
315 // 2) Partial repsonse (206) with a single returned range (note that we already coalesced adjacent ranges
316 // in the request): returns the requested byte range (or shorter, if requested range goes past EOF)
317 // 3) Partial repsonse (206) for multiple requested ranges: multipart MIME message;
318 // parts can come in any order (but usually they should come in the order of the requested ranges).
319 // Requested ranges that are outside the size of the remote resource are ignored.
320 // 4) All ranges are outside the remote resource size: 416 (unsatisfiable request)
321
322 if (transfer->fResponseCode == 0) {
323 // Only called here the first time before any data of the reponse is processed
324 auto rc = curl_easy_getinfo(transfer->fHandle, CURLINFO_RESPONSE_CODE, &transfer->fResponseCode);
325 R__ASSERT(rc == CURLE_OK);
326
327 if (transfer->IsPartial()) {
328 // Check for the content-range header which will be present only if a single range was returned.
329#ifdef HAS_CURL_EASY_HEADER
330 curl_header *h = nullptr;
331 transfer->fHasMultipartContent =
332 curl_easy_header(transfer->fHandle, "content-range", 0, CURLH_HEADER, -1, &h) != CURLHE_OK;
333#else
334 transfer->fHasMultipartContent = !transfer->fHasContentRangeHeader;
335#endif
336 if (transfer->fHasMultipartContent) {
337 transfer->fInPartHeader = true;
338 } else {
339 // A range request for a single range must return it precisely (it can only cut at the end for EOF)
340 transfer->fNBytesProcessed = transfer->GetCurrentRange().fOffset;
341 }
342 }
343 }
344
345 if (transfer->fResponseCode / 100 != kHttpResponseSuccessClass) {
346 // ignore the HTTP error message body
347 return nbytes;
348 }
349
350 if (transfer->fHasMultipartContent)
351 return ProcessMultipartData(data, nbytes, transfer);
352
353 return ProcessRawData(data, nbytes, transfer);
354}
355
356#ifndef HAS_CURL_EASY_HEADER
357/// TODO(jblomer): remove me when we can require libcurl >= 7.83. Used to remember if a "content-range" header was seen
358std::size_t CallbackHeader(char *buffer, std::size_t size, std::size_t nitems, void *userdata)
359{
360 std::size_t nbytes = size * nitems;
361 if (nbytes == 0)
362 return 0;
363
364 RTransferState *transfer = static_cast<RTransferState *>(userdata);
365
366 std::string headerLine(buffer, nbytes);
367 std::transform(headerLine.begin(), headerLine.end(), headerLine.begin(), ::toupper);
368 if (headerLine.rfind("CONTENT-RANGE:", 0) == 0) {
369 transfer->fHasContentRangeHeader = true;
370 }
371
372 return nbytes;
373}
374#endif
375
376int CallbackDebug(CURL * /* handle */, curl_infotype type, char *data, size_t size, void * /* clientp */)
377{
378 std::string prefix = "(libcurl debug) ";
379 switch (type) {
380 case CURLINFO_TEXT: prefix += "{info} "; break;
381 case CURLINFO_HEADER_IN: prefix += "{header/recv} "; break;
382 case CURLINFO_HEADER_OUT: prefix += "{header/sent} "; break;
383 case CURLINFO_DATA_IN: prefix += "{data/recv} "; break;
384 case CURLINFO_DATA_OUT: prefix += "{data/sent} "; break;
385 case CURLINFO_SSL_DATA_IN: prefix += "{ssldata/recv} "; break;
386 case CURLINFO_SSL_DATA_OUT: prefix += "{ssldata/sent} "; break;
387 default: break;
388 }
389
390 switch (type) {
391 case CURLINFO_DATA_IN:
392 case CURLINFO_DATA_OUT:
393 case CURLINFO_SSL_DATA_IN:
394 case CURLINFO_SSL_DATA_OUT:
395 if (size > kMaxDebugDataChars) {
396 Info("RCurlConnection", "%s <snip>", prefix.c_str());
397 return 0;
398 }
399 default: break;
400 }
401
402 std::string msg(data, size);
403 bool isPrintable = true;
404
405 for (std::size_t i = 0; i < msg.length(); ++i) {
406 if (msg[i] == '\0') {
407 msg[i] = '~';
408 }
409
410 if ((msg[i] < ' ' || msg[i] > '~') && (msg[i] != 10 /*line feed*/ && msg[i] != 13 /*carriage return*/)) {
411 isPrintable = false;
412 break;
413 }
414 }
415
416 if (!isPrintable) {
417 msg = "<Non-plaintext sequence>";
418 }
419 Info("RCurlConnection", "%s%s", prefix.c_str(), msg.c_str());
420 return 0;
421}
422
423// From the list of non-overlapping (displaced) ranges, create the HTTP request ranges, ordered by offset.
424// Skip empty user ranges and coalesce adjacent user ranges.
425std::vector<RHttpRange>
426CreateRequestRanges(const ROOT::Internal::RCurlConnection::RUserRange *ranges, const std::vector<std::size_t> &order)
427{
428 const auto N = order.size();
429 std::vector<RHttpRange> result;
430
431 std::uint64_t rangeBegin = 0;
432 std::uint64_t rangeEnd = 0;
433 for (std::size_t i = 0; i < N; ++i) {
434 const auto &r = ranges[order[i]];
435
436 if (r.fLength == 0)
437 continue;
438
439 if (r.fOffset == rangeEnd) {
440 // Merge adjacent ranges into a single request range
441 rangeEnd = r.fOffset + r.fLength;
442 continue;
443 }
444
445 // Emit previous range
446 if (rangeEnd > 0)
447 result.emplace_back(RHttpRange{rangeBegin, rangeEnd - 1});
448
449 // Open new range
450 rangeBegin = r.fOffset;
451 rangeEnd = r.fOffset + r.fLength;
452 }
453 if (rangeEnd > 0) {
454 result.emplace_back(RHttpRange{rangeBegin, rangeEnd - 1});
455 }
456
457 return result;
458}
459
460/// For overlapping ranges, the displacement moves the offset of the rear request to the end of the front one.
461/// E.g., for the following situation with ranges
462///
463/// |---RANGE A---|
464/// |--------RANGE B-------|
465/// |--RANGE C--|
466/// |---------RANGE D----------|
467///
468/// the displacement will move the offsets of ranges B, C, D so that the result is
469/// |---RANGE A---|---RANGE B---|-RANGE D-|
470/// * (RANGE C, zero-sized)
471///
472/// Ranges fully contained in previous ranges end up zero-sized after displacement.
473/// Note that the range that contains the prefix of a range at hand is not necessarily the immedate predecessor.
474///
475/// Returns the additions to the offsets of the passed ranges. The first range always has a displacement of zero,
476/// so in principle the returned vector could be of size N - 1. But it seems simpler to keep it same sized.
477/// In the retured vector, index $k$ belongs to ranges[order[k]].
478std::vector<std::size_t>
479CreateAndApplyDisplacements(ROOT::Internal::RCurlConnection::RUserRange *ranges, const std::vector<std::size_t> &order)
480{
481 const auto N = order.size();
482 std::vector<std::size_t> displacements(N);
483 for (std::size_t i = 1; i < N; ++i) {
484 // ranges 0 .. i - 1 are already non-overlapping; check from i onwards if the ranges overlap with range i - 1.
485 const auto &prevRange = ranges[order[i - 1]];
486 if (prevRange.fLength == 0)
487 continue;
488
489 const auto prevLastByte = prevRange.fOffset + prevRange.fLength - 1;
490 // Quadratic complexity for pathological cases only,
491 // which can be easily fixed by better preprocessing of the ranges
492 for (auto j = i; j < N; ++j) {
493 auto &thisRange = ranges[order[j]];
494 if (thisRange.fOffset > prevLastByte)
495 break;
496 auto displacement =
497 std::min(static_cast<std::uint64_t>(thisRange.fLength), prevLastByte - thisRange.fOffset + 1);
498 // As we move the offset, we may break the sorting. Violations of the sort order, however, can only
499 // take place for zero-length displaced ranges, i.e. ranges fully contained within previous ranges.
500 // They are ignored during the data callback, so the sort order violation doesn't matter.
501 thisRange.fOffset += displacement;
502 thisRange.fDestination += displacement;
503 thisRange.fLength -= displacement;
504 displacements[j] += displacement;
505 }
506 }
507 return displacements;
508}
509
510/// After the HTTP transfer, all the data from the user ranges have been copied into ranges buffers once.
511/// In the process of reversing the displacements, overlapping ranges are copied from the containing ranges
512/// buffer into the displaced buffer sections. We always restore the originally provided information in ranges,
513/// but we only actually copy data if the request was successful.
514void ReverseDisplacements(std::vector<std::size_t> &displacements, ROOT::Internal::RCurlConnection::RUserRange *ranges,
515 const std::vector<std::size_t> &order, bool copyBuffers)
516{
517 const auto N = order.size();
518 for (std::size_t i = 1; i < N; ++i) {
519 if (displacements[i] == 0)
520 continue;
521
522 auto &thisRange = ranges[order[i]];
523 std::size_t j = i - 1;
524 do {
525 // We go step by step through the previous ranges (which are already reversed) to copy the information.
526 const auto &prevRange = ranges[order[j]];
527 if ((prevRange.fLength == 0) || (prevRange.fOffset + prevRange.fLength < thisRange.fOffset)) {
528 j--;
529 continue;
530 }
531 const std::size_t nbytesReverse =
532 std::min(static_cast<std::uint64_t>(displacements[i]), thisRange.fOffset - prevRange.fOffset);
533 thisRange.fOffset -= nbytesReverse;
534 thisRange.fDestination -= nbytesReverse;
535 thisRange.fLength += nbytesReverse;
536 displacements[i] -= nbytesReverse;
537
538 // The previous range has not been fully filled if it goes past the size of the remote resource.
539 // In this case, the current range may also only be partially filled.
540 std::size_t nbytesRecvFromPrev = 0;
541 if (prevRange.fOffset + prevRange.fNBytesRecv >= thisRange.fOffset) {
542 nbytesRecvFromPrev = std::min(prevRange.fOffset + prevRange.fNBytesRecv - thisRange.fOffset,
543 static_cast<std::uint64_t>(nbytesReverse));
544 }
545
546 thisRange.fNBytesRecv += nbytesRecvFromPrev;
547 if (copyBuffers && (nbytesRecvFromPrev > 0)) {
548 memcpy(thisRange.fDestination, prevRange.fDestination + (thisRange.fOffset - prevRange.fOffset),
549 nbytesRecvFromPrev);
550 }
551 } while (displacements[i]);
552 }
553}
554
555std::string GetCurlErrorString(CURLcode code)
556{
557 return std::string(curl_easy_strerror(code)) + " (" + std::to_string(code) + ")";
558}
559
560std::string GetCurlUrlErrorString(CURLUcode code)
561{
562#ifdef HAS_CURL_URL_STRERROR
563 return std::string(curl_url_strerror(code)) + " (" + std::to_string(code) + ")";
564#else
565 return std::string("libcurl too old for mapping error number to text") + " (" + std::to_string(code) + ")";
566#endif
567}
568
569std::string GetUserAgentString()
570{
571 SysInfo_t s;
572 gSystem->GetSysInfo(&s);
573
574 auto curlVersionInfo = GetCurlVersionInfo();
575
576 return std::string("ROOT/v") + ROOT_RELEASE + " (" + std::string(s.fOS) + ") curl/" + curlVersionInfo->version +
577 " " + curlVersionInfo->ssl_version;
578}
579
580} // anonymous namespace
581
583{
584 return LIBCURL_VERSION_NUM;
585}
586
588{
589 EnsureCurlInitialized();
590
591 fHandle = curl_easy_init();
592 if (!fHandle) {
593 throw RException(R__FAIL("cannot initialize curl handle"));
594 }
595
597 SetOptions();
598
599 auto result = SetUrl(url);
600 if (!result) {
601 curl_easy_cleanup(fHandle);
602 result.Throw();
603 }
604}
605
607{
608 if (fHandle)
609 curl_easy_cleanup(fHandle);
610}
611
613{
614 std::swap(fHandle, other.fHandle);
615 std::swap(fCredentials, other.fCredentials);
617}
618
619ROOT::Internal::RCurlConnection &ROOT::Internal::RCurlConnection::RCurlConnection::operator=(RCurlConnection &&other)
620{
621 if (this == &other)
622 return *this;
623 fHandle = other.fHandle;
624 other.fHandle = nullptr;
625 fCredentials = std::move(other.fCredentials);
626 SetupErrorBuffer();
627 return *this;
628}
629
631{
632 if (!fErrorBuffer)
633 fErrorBuffer = std::make_unique<char[]>(CURL_ERROR_SIZE);
634 auto rc = curl_easy_setopt(fHandle, CURLOPT_ERRORBUFFER, fErrorBuffer.get());
635 R__ASSERT(rc == CURLE_OK);
636}
637
639{
640 int rc;
641
642 if (gDebug) {
643 rc = curl_easy_setopt(fHandle, CURLOPT_VERBOSE, 1);
644 R__ASSERT(rc == CURLE_OK);
645 rc = curl_easy_setopt(fHandle, CURLOPT_DEBUGFUNCTION, CallbackDebug);
646 R__ASSERT(rc == CURLE_OK);
647 } else {
648 rc = curl_easy_setopt(fHandle, CURLOPT_VERBOSE, 0);
649 R__ASSERT(rc == CURLE_OK);
650 }
651
652 static const std::string kUserAgent = GetUserAgentString();
653 rc = curl_easy_setopt(fHandle, CURLOPT_USERAGENT, kUserAgent.c_str());
654 R__ASSERT(rc == CURLE_OK);
655
656 rc = curl_easy_setopt(fHandle, CURLOPT_FOLLOWLOCATION, 1);
657 R__ASSERT(rc == CURLE_OK);
658
659 rc = curl_easy_setopt(fHandle, CURLOPT_WRITEFUNCTION, CallbackData);
660 R__ASSERT(rc == CURLE_OK);
661}
662
664{
665 CURLU *cu = curl_url();
666 R__ASSERT(cu);
667 auto rc = curl_url_set(cu, CURLUPART_URL, url.c_str(), CURLU_URLENCODE);
668 if (rc != CURLUE_OK) {
669 curl_url_cleanup(cu);
670 return R__FAIL(std::string("invalid URL: ") + std::string(url) + " [" + GetCurlUrlErrorString(rc) + "]");
671 }
672
673 char *escaped_url = nullptr;
674 rc = curl_url_get(cu, CURLUPART_URL, &escaped_url, CURLU_NO_DEFAULT_PORT);
675 curl_url_cleanup(cu);
676 if (rc != CURLUE_OK) {
677 return R__FAIL(std::string("URL escape error: ") + std::string(url) + " [" + GetCurlUrlErrorString(rc) + "]");
678 }
679
680 fEscapedUrl = escaped_url;
681
682 auto rcOpt = curl_easy_setopt(fHandle, CURLOPT_URL, escaped_url);
683 curl_free(escaped_url);
684 if (rcOpt != CURLE_OK) {
685 return R__FAIL("cannot set URL: " + GetCurlErrorString(rcOpt));
686 }
687
688 return RResult<void>::Success();
689}
690
692{
693 auto rc = curl_easy_perform(fHandle);
694
695// CURLE_TOO_LARGE is available as of curl version 8.6.0
696#ifdef CURLE_TOO_LARGE
697 if (rc == CURLE_TOO_LARGE) {
698#else
699 if (rc == CURLE_OUT_OF_MEMORY) {
700#endif
701 // The ranges don't even fit in the request header
703 } else if (rc != CURLE_OK) {
704 status.fStatusMsg = fErrorBuffer.get();
705 status.fStatusMsg += " [" + GetCurlErrorString(rc) + "]";
706
707 long osErrNo = 0;
708 rc = curl_easy_getinfo(fHandle, CURLINFO_OS_ERRNO, &osErrNo);
709 if (rc == CURLE_OK)
710 status.fStatusMsg += " (OS errno: " + std::to_string(osErrNo) + ")";
711 } else {
712 long responseCode = 0;
713 rc = curl_easy_getinfo(fHandle, CURLINFO_RESPONSE_CODE, &responseCode);
714 R__ASSERT(rc == CURLE_OK);
715 if ((responseCode / 100 == kHttpResponseSuccessClass) || (responseCode == kHttpResponseRangeNotSatisfiable)) {
716 // Requests past the size of the remote resource are considered valid. They simply receive zero bytes.
718 } else if (responseCode == kHttpResponseNotFound) {
720 } else if (responseCode == kHttpResponseBadRequest) {
722 } else if (responseCode == kHttpResponseForbidden) {
724 } else {
726 }
727 }
728}
729
731{
732 remoteSize = kUnknownSize;
733
734 auto rc = curl_easy_setopt(fHandle, CURLOPT_NOBODY, 1);
735 R__ASSERT(rc == CURLE_OK);
736 rc = curl_easy_setopt(fHandle, CURLOPT_RANGE, NULL); // may have been set by a previous SendRangesReq() on the object
737 R__ASSERT(rc == CURLE_OK);
738
739#ifndef HAS_CURL_EASY_HEADER
740 rc = curl_easy_setopt(fHandle, CURLOPT_HEADERFUNCTION, NULL);
741 R__ASSERT(rc == CURLE_OK);
742 rc = curl_easy_setopt(fHandle, CURLOPT_HEADERDATA, NULL);
743 R__ASSERT(rc == CURLE_OK);
744#endif
745
746 RStatus status;
747 Perform(status);
748 if (status) {
749 curl_off_t length = -1;
750 rc = curl_easy_getinfo(fHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &length);
751 if (rc == CURLE_OK && length >= 0)
752 remoteSize = length;
753 }
754
755 return status;
756}
757
760{
761 if (N == 0) {
762 // Pretend that we submitted a successful request
764 }
765
766 // Construct an array of indices that allows to iterate the ranges in order sorted by offset
767 std::vector<std::size_t> order(N);
768 std::iota(order.begin(), order.end(), 0);
769 std::sort(order.begin(), order.end(), [ranges](std::size_t a, std::size_t b) { return ranges[a] < ranges[b]; });
770
771 // Fixup overlapping ranges
772 auto displacements = CreateAndApplyDisplacements(ranges, order);
773
774 // Construct the consolidated HTTP ranges from the ordered and non-overlapping user ranges
775 const auto requestRanges = CreateRequestRanges(ranges, order);
776 if (requestRanges.empty()) {
777 // In this case, we know that we did not apply any displacements
779 }
780
781 auto rc = curl_easy_setopt(fHandle, CURLOPT_HTTPGET, 1);
782 R__ASSERT(rc == CURLE_OK);
783
784 RTransferState transfer(ranges, order, fHandle);
785 rc = curl_easy_setopt(fHandle, CURLOPT_WRITEDATA, &transfer);
786 R__ASSERT(rc == CURLE_OK);
787
788#ifndef HAS_CURL_EASY_HEADER
789 rc = curl_easy_setopt(fHandle, CURLOPT_HEADERFUNCTION, CallbackHeader);
790 R__ASSERT(rc == CURLE_OK);
791 rc = curl_easy_setopt(fHandle, CURLOPT_HEADERDATA, &transfer);
792 R__ASSERT(rc == CURLE_OK);
793#endif
794
795 RStatus status;
796 // There is no HTTP request to determine the maximum number of ranges that the web server can serve.
797 // Therefore, we try with all the ranges (or fMaxNRangesPerReqest, if explicitly set), and half that number
798 // as long as needed.
799 // If we need to reduce the number of ranges per requests and no limit was set,
800 // we will remember the working number for the next requests.
801 std::size_t batchSize = fMaxNRangesPerReqest ? fMaxNRangesPerReqest : requestRanges.size();
802 bool tryAgain;
803 do {
804 tryAgain = false;
805 // If we have multiple batches, we could in principle submit them concurrently using multiple connections
806 // (and CURL easy handles) that get pooled in a CURL multi handle.
807 // This is a potential future optimization.
808 for (std::size_t b = 0; b < requestRanges.size(); b += batchSize) {
809 const std::size_t nRanges = std::min(batchSize, requestRanges.size() - b);
810 std::string rangeHeader = requestRanges[b].ToString();
811 for (std::size_t i = 1; i < nRanges; ++i) {
812 rangeHeader += "," + requestRanges[b + i].ToString();
813 }
814 rc = curl_easy_setopt(fHandle, CURLOPT_RANGE, rangeHeader.c_str());
815 R__ASSERT(rc == CURLE_OK);
816
817 if (b > 0) {
818 const std::uint64_t lastByteRequested = requestRanges[b - 1].fLastByte;
819 // Advance all ranges that are already out of scope
820 // Note that we have to start at zero because the previous multi-part request may have visited the
821 // ranges in arbirary order.
822 for (transfer.fCurrentRange = 0; transfer.fCurrentRange < N; transfer.fCurrentRange++) {
823 if (transfer.GetCurrentRange().fOffset > lastByteRequested)
824 break;
825 }
826 }
827
828 transfer.fResponseCode = 0; // reset HTTP response code for the next request
829 Perform(status);
830 if ((status.fStatusCode == RStatus::kTooManyRanges) && (batchSize > 1)) {
831 batchSize /= 2;
832 tryAgain = true;
833 break;
834 }
835 if (!status)
836 break;
837 }
838 } while (tryAgain);
839
840 if (status && (fMaxNRangesPerReqest == 0) && (batchSize < requestRanges.size()))
841 fMaxNRangesPerReqest = batchSize;
842
843 if (!transfer.fExtraMsg.empty()) {
844 status.fStatusMsg += "; extra information: " + transfer.fExtraMsg;
845 }
846
847 ReverseDisplacements(displacements, ranges, order, static_cast<bool>(status));
848
849 return status;
850}
851
853{
855
856 const std::string region = credentials.fRegion.empty() ? "default" : credentials.fRegion;
857 const std::string sigArg = std::string("aws:amz:") + region + ":s3";
858 auto rc = curl_easy_setopt(fHandle, CURLOPT_AWS_SIGV4, sigArg.c_str());
859 if (rc != CURLE_OK) {
860 throw RException(R__FAIL(std::string("cannot set CURLOPT_AWS_SIGV4: ") + GetCurlErrorString(rc)));
861 }
862
863 const std::string userPwd = credentials.fAccessKey + ":" + credentials.fSecretKey;
864 rc = curl_easy_setopt(fHandle, CURLOPT_USERPWD, userPwd.c_str());
865 if (rc != CURLE_OK) {
866 throw RException(R__FAIL(std::string("cannot set CURLOPT_USERPWD: ") + GetCurlErrorString(rc)));
867 }
868
869 fCredentials = std::make_unique<RHTTPCredentials>();
871 fCredentials->fData = credentials;
872}
873
875{
876 if (!fCredentials)
877 return;
878
879 CURLcode rc;
880 switch (fCredentials->fType) {
882 rc = curl_easy_setopt(fHandle, CURLOPT_AWS_SIGV4, NULL);
883 R__ASSERT(rc == CURLE_OK);
884 rc = curl_easy_setopt(fHandle, CURLOPT_USERPWD, NULL);
885 R__ASSERT(rc == CURLE_OK);
886 break;
887 default: R__ASSERT(false && "internal error: unknown credentials type");
888 }
889 fCredentials.reset();
890}
891
896
897/// Sets the credentials from process environment variables. Currently supported
898/// - S3_ACCESS_KEY, S3_SECRET_KEY, S3_REGION
899/// If the environment variables are not found, clear any credentials from the connection.
901{
903
904 const auto accessKey = std::getenv("S3_ACCESS_KEY");
905 if (accessKey && (accessKey[0] != '\0')) {
906 const auto secretKey = std::getenv("S3_SECRET_KEY");
907 if (!secretKey || (secretKey[0] == '\0')) {
908 R__LOG_WARNING(HTTPClientLog()) << "found S3_ACCESS_KEY environment variable but S3_SECRET_KEY unset. "
909 "Ignoring S3 credentials.";
910 return;
911 }
912 const auto region = std::getenv("S3_REGION");
913 SetCredentials(RS3Credentials{accessKey, secretKey, region ? region : ""});
914 }
915}
ROOT::R::TRInterface & r
Definition Object.C:4
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
static const char * ToString(ENameCycleError err)
Definition RFile.cxx:50
#define R__LOG_WARNING(...)
Definition RLogger.hxx:357
#define b(i)
Definition RSha256.hxx:100
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
#define ROOT_RELEASE
Definition RVersion.hxx:44
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:241
#define N
Int_t gDebug
Definition TROOT.cxx:777
externTSystem * gSystem
Definition TSystem.h:582
#define NULL
Definition ZInflate.c:15
Encapsulates a curl easy handle and provides an interface to send HTTP HEAD and (multi-)range queries...
void SetCredentials(const RS3Credentials &credentials)
void * fHandle
the CURL easy handle corresponding to this connection
std::string fEscapedUrl
The URL provided in the constructor escaped according to standard rules.
std::unique_ptr< char[]> fErrorBuffer
For use by libcurl.
RStatus SendRangesReq(std::size_t N, RUserRange *ranges)
Reads the given ranges from the remote resource.
static constexpr std::uint64_t kUnknownSize
Returned by SendHeadReq() if the HTTP response contains no content-length header.
std::size_t fMaxNRangesPerReqest
If set to zero, automatically adjust: try with all given ranges and as long as the number of ranges i...
RCurlConnection(const std::string &url)
RStatus SendHeadReq(std::uint64_t &remoteSize)
Checks if the resource exists and if it does, return the value of the content-length header as size.
static int GetCurlVersion()
Used for testing.
void SetCredentialsFromEnvironment()
Sets the credentials from process environment variables.
RResult< void > SetUrl(const std::string &url)
std::unique_ptr< RHTTPCredentials > fCredentials
EHTTPCredentialsType GetCredentialsType() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A log configuration for a channel, e.g.
Definition RLogger.hxx:97
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
Double_t result(Double_t *x, Double_t *par)
Definition gr201_waves.C:37
BVH_ALWAYS_INLINE T length(const Vec< T, N > &v)
Definition vec.h:122
constexpr auto empty(const C &c) -> decltype(c.empty())
offset
Definition df105.py:216
TSpectrum2 * s
Definition peaks2.C:33
Return value for both HEAD and GET requests.
@ kTooManyRanges
should not get to the user; number of request ranges is automatically reduced as needed
Caller-provided byte-range of the remote resource together with a pointer to a buffer.
void * handle(void *ptr)
Definition threads.C:21