27#if LIBCURL_VERSION_NUM >= 0x078300
28#define HAS_CURL_EASY_HEADER
31#if LIBCURL_VERSION_NUM >= 0x078000
32#define HAS_CURL_URL_STRERROR
37static constexpr int kHttpResponseSuccessClass = 2;
38static constexpr int kHttpResponsePartial = 206;
39static constexpr int kHttpResponseBadRequest = 400;
40static constexpr int kHttpResponseForbidden = 403;
41static constexpr int kHttpResponseNotFound = 404;
42static constexpr int kHttpResponseRangeNotSatisfiable = 416;
44static constexpr int kMaxDebugDataChars = 50;
48 std::uint64_t fFirstByte = std::uint64_t(-1);
49 std::uint64_t fLastByte = std::uint64_t(-1);
51 std::string
ToString()
const {
return std::to_string(fFirstByte) +
"-" + std::to_string(fLastByte); }
56struct RTransferState {
59 ROOT::Internal::RCurlConnection::RUserRange *fRanges =
nullptr;
60 const std::vector<std::size_t> &fOrder;
61 CURL *fHandle =
nullptr;
63 std::size_t fCurrentRange = 0;
64 long fResponseCode = 0;
65 std::size_t fNBytesProcessed = 0;
66 bool fHasMultipartContent =
false;
67 std::string fExtraMsg;
70 std::string fPartHeaders;
71 bool fInPartHeader =
false;
72 std::uint8_t fNEohCharsFound = 0;
73 std::size_t fNBytesPartRemain = 0;
75#ifndef HAS_CURL_EASY_HEADER
76 bool fHasContentRangeHeader =
false;
79 RTransferState(ROOT::Internal::RCurlConnection::RUserRange *ranges,
const std::vector<std::size_t> &order,
81 : fRanges(ranges), fOrder(order), fHandle(
handle)
85 ROOT::Internal::RCurlConnection::RUserRange &GetCurrentRange()
const {
return fRanges[fOrder[fCurrentRange]]; }
87 std::size_t GetNRanges()
const {
return fOrder.size(); }
94 }
while ((fCurrentRange < GetNRanges()) && (GetCurrentRange().fLength == 0));
97 bool IsPartial()
const {
return fResponseCode == kHttpResponsePartial; }
106void EnsureCurlInitialized()
108 static const auto kInitCode = curl_global_init(CURL_GLOBAL_DEFAULT);
109 if (kInitCode != CURLE_OK) {
115const curl_version_info_data *GetCurlVersionInfo()
117 static const curl_version_info_data *kVersionInfo = curl_version_info(CURLVERSION_NOW);
125ROOT::RResult<void> ParseContentRange(
const std::string &headers, std::uint64_t &offset, std::uint64_t &length)
128 static constexpr char kContentRangeHeader[16] = {
'\r',
'\n',
'c',
'o',
'n',
't',
'e',
'n',
129 't',
'-',
'r',
'a',
'n',
'g',
'e',
':'};
131 auto it = std::search(headers.begin(), headers.end(), std::begin(kContentRangeHeader), std::end(kContentRangeHeader),
132 [](
char a,
char b) { return std::toupper(a) == std::toupper(b); });
133 if (it == headers.end()) {
134 return R__FAIL(
"cannot find 'content-range' header in multipart response");
137 std::string strBytePos[2];
138 int idxStrBytePos = 0;
139 it +=
sizeof(kContentRangeHeader);
141 if (it == headers.end()) {
142 return R__FAIL(
"premature end of 'content-range' header in multipart response");
145 if (std::isspace(*it) || std::isalpha(*it)) {
150 if (idxStrBytePos == 1) {
151 return R__FAIL(std::string(
"invalid 'content-range' header: ") + headers);
160 strBytePos[idxStrBytePos].push_back(*it);
164 if (strBytePos[0].
empty() || strBytePos[1].
empty())
165 return R__FAIL(std::string(
"invalid 'content-range' header: ") + headers);
168 offset = std::strtoull(strBytePos[0].c_str(), &end, 10);
170 return R__FAIL(std::string(
"invalid 'content-range' header: ") + headers);
171 auto lastBytePos = std::strtoull(strBytePos[1].c_str(), &end, 10);
173 return R__FAIL(std::string(
"invalid 'content-range' header: ") + headers);
175 if (lastBytePos < offset)
176 return R__FAIL(std::string(
"invalid 'content-range' header: ") + headers);
178 length = lastBytePos - offset + 1;
179 if (length > std::numeric_limits<std::size_t>::max()) {
180 return R__FAIL(std::string(
"part of multipart response too big: ") + std::to_string(length) +
"B");
187std::size_t ProcessMultipartData(
char *data, std::size_t nbytes, RTransferState *transfer)
189 static constexpr char kEndOfHeaders[4] = {
'\r',
'\n',
'\r',
'\n'};
191 std::size_t nbytesRemain = nbytes;
193 while (nbytesRemain > 0) {
194 if (transfer->fInPartHeader) {
195 transfer->fPartHeaders.push_back(*data);
196 if (*data == kEndOfHeaders[transfer->fNEohCharsFound]) {
197 transfer->fNEohCharsFound++;
198 if (transfer->fNEohCharsFound ==
sizeof(kEndOfHeaders)) {
199 transfer->fInPartHeader =
false;
200 transfer->fNEohCharsFound = 0;
203 transfer->fNEohCharsFound = 0;
208 if (transfer->fInPartHeader)
212 std::uint64_t partOffset = 0;
214 auto result = ParseContentRange(transfer->fPartHeaders, partOffset, length);
215 transfer->fPartHeaders.clear();
217 transfer->fExtraMsg =
result.GetError()->GetReport();
221 transfer->fNBytesPartRemain =
length;
227 for (; i < transfer->GetNRanges(); ++
i) {
228 const auto &
r = transfer->GetCurrentRange();
229 if ((
r.fLength > 0) && (
r.fOffset == partOffset))
231 transfer->fCurrentRange = (transfer->fCurrentRange + 1) % transfer->GetNRanges();
233 if (i == transfer->GetNRanges()) {
234 transfer->fExtraMsg = std::string(
"unexpected part with offset ") + std::to_string(partOffset);
240 auto &range = transfer->GetCurrentRange();
241 const std::size_t nbytesCopy =
243 if (nbytesCopy > 0) {
247 range.fNBytesRecv += nbytesCopy;
248 nbytesRemain -= nbytesCopy;
249 transfer->fNBytesPartRemain -= nbytesCopy;
251 if (transfer->fNBytesPartRemain == 0) {
253 transfer->AdvanceRange();
254 transfer->fInPartHeader =
true;
257 transfer->AdvanceRange();
258 if (transfer->fCurrentRange == transfer->GetNRanges()) {
259 transfer->fExtraMsg = std::string(
"received range too long");
265 transfer->fNBytesProcessed += nbytes;
271std::size_t ProcessRawData(
char *data, std::size_t nbytes, RTransferState *transfer)
273 std::size_t nbytesRemain = nbytes;
275 while ((nbytesRemain > 0) && (transfer->fCurrentRange < transfer->GetNRanges())) {
276 auto &range = transfer->GetCurrentRange();
278 std::size_t nbytesSkip = 0;
279 if (transfer->fNBytesProcessed < range.fOffset) {
281 nbytesSkip = std::min(
static_cast<std::uint64_t
>(nbytesRemain), range.fOffset - transfer->fNBytesProcessed);
284 const std::size_t nbytesCopy = std::min(nbytesRemain - nbytesSkip, range.fLength -
range.fNBytesRecv);
285 if (nbytesCopy > 0) {
290 range.fNBytesRecv += nbytesCopy;
292 transfer->AdvanceRange();
294 nbytesRemain -= nbytesSkip + nbytesCopy;
295 data += nbytesSkip + nbytesCopy;
296 transfer->fNBytesProcessed += nbytesSkip + nbytesCopy;
298 transfer->fNBytesProcessed += nbytesRemain;
305std::size_t CallbackData(
char *data, std::size_t
size, std::size_t nmemb,
void *userdata)
307 std::size_t nbytes =
size * nmemb;
311 RTransferState *transfer =
static_cast<RTransferState *
>(userdata);
322 if (transfer->fResponseCode == 0) {
324 auto rc = curl_easy_getinfo(transfer->fHandle, CURLINFO_RESPONSE_CODE, &transfer->fResponseCode);
327 if (transfer->IsPartial()) {
329#ifdef HAS_CURL_EASY_HEADER
330 curl_header *
h =
nullptr;
331 transfer->fHasMultipartContent =
332 curl_easy_header(transfer->fHandle,
"content-range", 0, CURLH_HEADER, -1, &
h) != CURLHE_OK;
334 transfer->fHasMultipartContent = !transfer->fHasContentRangeHeader;
336 if (transfer->fHasMultipartContent) {
337 transfer->fInPartHeader =
true;
340 transfer->fNBytesProcessed = transfer->GetCurrentRange().fOffset;
345 if (transfer->fResponseCode / 100 != kHttpResponseSuccessClass) {
350 if (transfer->fHasMultipartContent)
351 return ProcessMultipartData(data, nbytes, transfer);
353 return ProcessRawData(data, nbytes, transfer);
356#ifndef HAS_CURL_EASY_HEADER
358std::size_t CallbackHeader(
char *buffer, std::size_t
size, std::size_t nitems,
void *userdata)
360 std::size_t nbytes =
size * nitems;
364 RTransferState *transfer =
static_cast<RTransferState *
>(userdata);
366 std::string headerLine(buffer, nbytes);
367 std::transform(headerLine.begin(), headerLine.end(), headerLine.begin(), ::toupper);
368 if (headerLine.rfind(
"CONTENT-RANGE:", 0) == 0) {
369 transfer->fHasContentRangeHeader =
true;
376int CallbackDebug(CURL * , curl_infotype type,
char *data,
size_t size,
void * )
378 std::string prefix =
"(libcurl debug) ";
380 case CURLINFO_TEXT: prefix +=
"{info} ";
break;
381 case CURLINFO_HEADER_IN: prefix +=
"{header/recv} ";
break;
382 case CURLINFO_HEADER_OUT: prefix +=
"{header/sent} ";
break;
383 case CURLINFO_DATA_IN: prefix +=
"{data/recv} ";
break;
384 case CURLINFO_DATA_OUT: prefix +=
"{data/sent} ";
break;
385 case CURLINFO_SSL_DATA_IN: prefix +=
"{ssldata/recv} ";
break;
386 case CURLINFO_SSL_DATA_OUT: prefix +=
"{ssldata/sent} ";
break;
391 case CURLINFO_DATA_IN:
392 case CURLINFO_DATA_OUT:
393 case CURLINFO_SSL_DATA_IN:
394 case CURLINFO_SSL_DATA_OUT:
395 if (
size > kMaxDebugDataChars) {
396 Info(
"RCurlConnection",
"%s <snip>", prefix.c_str());
402 std::string msg(data,
size);
403 bool isPrintable =
true;
405 for (std::size_t i = 0; i < msg.length(); ++
i) {
406 if (msg[i] ==
'\0') {
410 if ((msg[i] <
' ' || msg[i] >
'~') && (msg[i] != 10 && msg[i] != 13 )) {
417 msg =
"<Non-plaintext sequence>";
419 Info(
"RCurlConnection",
"%s%s", prefix.c_str(), msg.c_str());
425std::vector<RHttpRange>
428 const auto N = order.size();
429 std::vector<RHttpRange>
result;
431 std::uint64_t rangeBegin = 0;
432 std::uint64_t rangeEnd = 0;
433 for (std::size_t i = 0; i <
N; ++
i) {
434 const auto &
r = ranges[order[i]];
439 if (
r.fOffset == rangeEnd) {
441 rangeEnd =
r.fOffset +
r.fLength;
447 result.emplace_back(RHttpRange{rangeBegin, rangeEnd - 1});
450 rangeBegin =
r.fOffset;
451 rangeEnd =
r.fOffset +
r.fLength;
454 result.emplace_back(RHttpRange{rangeBegin, rangeEnd - 1});
478std::vector<std::size_t>
481 const auto N = order.size();
482 std::vector<std::size_t> displacements(
N);
483 for (std::size_t i = 1; i <
N; ++
i) {
485 const auto &prevRange = ranges[order[i - 1]];
486 if (prevRange.fLength == 0)
489 const auto prevLastByte = prevRange.fOffset + prevRange.fLength - 1;
492 for (
auto j = i; j <
N; ++j) {
493 auto &thisRange = ranges[order[j]];
494 if (thisRange.fOffset > prevLastByte)
497 std::min(
static_cast<std::uint64_t
>(thisRange.fLength), prevLastByte - thisRange.fOffset + 1);
501 thisRange.fOffset += displacement;
502 thisRange.fDestination += displacement;
503 thisRange.fLength -= displacement;
504 displacements[j] += displacement;
507 return displacements;
515 const std::vector<std::size_t> &order,
bool copyBuffers)
517 const auto N = order.size();
518 for (std::size_t i = 1; i <
N; ++
i) {
519 if (displacements[i] == 0)
522 auto &thisRange = ranges[order[i]];
523 std::size_t j = i - 1;
526 const auto &prevRange = ranges[order[j]];
527 if ((prevRange.fLength == 0) || (prevRange.fOffset + prevRange.fLength < thisRange.fOffset)) {
531 const std::size_t nbytesReverse =
532 std::min(
static_cast<std::uint64_t
>(displacements[i]), thisRange.fOffset - prevRange.fOffset);
533 thisRange.fOffset -= nbytesReverse;
534 thisRange.fDestination -= nbytesReverse;
535 thisRange.fLength += nbytesReverse;
536 displacements[i] -= nbytesReverse;
540 std::size_t nbytesRecvFromPrev = 0;
541 if (prevRange.fOffset + prevRange.fNBytesRecv >= thisRange.fOffset) {
542 nbytesRecvFromPrev = std::min(prevRange.fOffset + prevRange.fNBytesRecv - thisRange.fOffset,
543 static_cast<std::uint64_t
>(nbytesReverse));
546 thisRange.fNBytesRecv += nbytesRecvFromPrev;
547 if (copyBuffers && (nbytesRecvFromPrev > 0)) {
548 memcpy(thisRange.fDestination, prevRange.fDestination + (thisRange.fOffset - prevRange.fOffset),
551 }
while (displacements[i]);
555std::string GetCurlErrorString(CURLcode code)
557 return std::string(curl_easy_strerror(code)) +
" (" + std::to_string(code) +
")";
560std::string GetCurlUrlErrorString(CURLUcode code)
562#ifdef HAS_CURL_URL_STRERROR
563 return std::string(curl_url_strerror(code)) +
" (" + std::to_string(code) +
")";
565 return std::string(
"libcurl too old for mapping error number to text") +
" (" + std::to_string(code) +
")";
569std::string GetUserAgentString()
574 auto curlVersionInfo = GetCurlVersionInfo();
576 return std::string(
"ROOT/v") +
ROOT_RELEASE +
" (" + std::string(
s.fOS) +
") curl/" + curlVersionInfo->version +
577 " " + curlVersionInfo->ssl_version;
584 return LIBCURL_VERSION_NUM;
589 EnsureCurlInitialized();
614 std::swap(
fHandle, other.fHandle);
624 other.fHandle =
nullptr;
625 fCredentials = std::move(other.fCredentials);
633 fErrorBuffer = std::make_unique<char[]>(CURL_ERROR_SIZE);
643 rc = curl_easy_setopt(
fHandle, CURLOPT_VERBOSE, 1);
645 rc = curl_easy_setopt(
fHandle, CURLOPT_DEBUGFUNCTION, CallbackDebug);
648 rc = curl_easy_setopt(
fHandle, CURLOPT_VERBOSE, 0);
652 static const std::string kUserAgent = GetUserAgentString();
653 rc = curl_easy_setopt(
fHandle, CURLOPT_USERAGENT, kUserAgent.c_str());
656 rc = curl_easy_setopt(
fHandle, CURLOPT_FOLLOWLOCATION, 1);
659 rc = curl_easy_setopt(
fHandle, CURLOPT_WRITEFUNCTION, CallbackData);
665 CURLU *cu = curl_url();
667 auto rc = curl_url_set(cu, CURLUPART_URL, url.c_str(), CURLU_URLENCODE);
668 if (rc != CURLUE_OK) {
669 curl_url_cleanup(cu);
670 return R__FAIL(std::string(
"invalid URL: ") + std::string(url) +
" [" + GetCurlUrlErrorString(rc) +
"]");
673 char *escaped_url =
nullptr;
674 rc = curl_url_get(cu, CURLUPART_URL, &escaped_url, CURLU_NO_DEFAULT_PORT);
675 curl_url_cleanup(cu);
676 if (rc != CURLUE_OK) {
677 return R__FAIL(std::string(
"URL escape error: ") + std::string(url) +
" [" + GetCurlUrlErrorString(rc) +
"]");
682 auto rcOpt = curl_easy_setopt(
fHandle, CURLOPT_URL, escaped_url);
683 curl_free(escaped_url);
684 if (rcOpt != CURLE_OK) {
685 return R__FAIL(
"cannot set URL: " + GetCurlErrorString(rcOpt));
693 auto rc = curl_easy_perform(
fHandle);
696#ifdef CURLE_TOO_LARGE
697 if (rc == CURLE_TOO_LARGE) {
699 if (rc == CURLE_OUT_OF_MEMORY) {
703 }
else if (rc != CURLE_OK) {
705 status.
fStatusMsg +=
" [" + GetCurlErrorString(rc) +
"]";
708 rc = curl_easy_getinfo(
fHandle, CURLINFO_OS_ERRNO, &osErrNo);
710 status.
fStatusMsg +=
" (OS errno: " + std::to_string(osErrNo) +
")";
712 long responseCode = 0;
713 rc = curl_easy_getinfo(
fHandle, CURLINFO_RESPONSE_CODE, &responseCode);
715 if ((responseCode / 100 == kHttpResponseSuccessClass) || (responseCode == kHttpResponseRangeNotSatisfiable)) {
718 }
else if (responseCode == kHttpResponseNotFound) {
720 }
else if (responseCode == kHttpResponseBadRequest) {
722 }
else if (responseCode == kHttpResponseForbidden) {
734 auto rc = curl_easy_setopt(
fHandle, CURLOPT_NOBODY, 1);
736 rc = curl_easy_setopt(
fHandle, CURLOPT_RANGE,
NULL);
739#ifndef HAS_CURL_EASY_HEADER
740 rc = curl_easy_setopt(
fHandle, CURLOPT_HEADERFUNCTION,
NULL);
742 rc = curl_easy_setopt(
fHandle, CURLOPT_HEADERDATA,
NULL);
749 curl_off_t length = -1;
750 rc = curl_easy_getinfo(
fHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &length);
751 if (rc == CURLE_OK && length >= 0)
767 std::vector<std::size_t> order(
N);
768 std::iota(order.begin(), order.end(), 0);
769 std::sort(order.begin(), order.end(), [ranges](std::size_t
a, std::size_t
b) { return ranges[a] < ranges[b]; });
772 auto displacements = CreateAndApplyDisplacements(ranges, order);
775 const auto requestRanges = CreateRequestRanges(ranges, order);
776 if (requestRanges.empty()) {
781 auto rc = curl_easy_setopt(
fHandle, CURLOPT_HTTPGET, 1);
784 RTransferState transfer(ranges, order,
fHandle);
785 rc = curl_easy_setopt(
fHandle, CURLOPT_WRITEDATA, &transfer);
788#ifndef HAS_CURL_EASY_HEADER
789 rc = curl_easy_setopt(
fHandle, CURLOPT_HEADERFUNCTION, CallbackHeader);
791 rc = curl_easy_setopt(
fHandle, CURLOPT_HEADERDATA, &transfer);
808 for (std::size_t
b = 0;
b < requestRanges.size();
b += batchSize) {
809 const std::size_t nRanges = std::min(batchSize, requestRanges.size() -
b);
810 std::string rangeHeader = requestRanges[
b].ToString();
811 for (std::size_t i = 1; i < nRanges; ++i) {
812 rangeHeader +=
"," + requestRanges[
b + i].ToString();
814 rc = curl_easy_setopt(
fHandle, CURLOPT_RANGE, rangeHeader.c_str());
818 const std::uint64_t lastByteRequested = requestRanges[
b - 1].fLastByte;
822 for (transfer.fCurrentRange = 0; transfer.fCurrentRange <
N; transfer.fCurrentRange++) {
823 if (transfer.GetCurrentRange().fOffset > lastByteRequested)
828 transfer.fResponseCode = 0;
843 if (!transfer.fExtraMsg.empty()) {
844 status.
fStatusMsg +=
"; extra information: " + transfer.fExtraMsg;
847 ReverseDisplacements(displacements, ranges, order,
static_cast<bool>(status));
856 const std::string region = credentials.
fRegion.empty() ?
"default" : credentials.
fRegion;
857 const std::string sigArg = std::string(
"aws:amz:") + region +
":s3";
858 auto rc = curl_easy_setopt(
fHandle, CURLOPT_AWS_SIGV4, sigArg.c_str());
859 if (rc != CURLE_OK) {
860 throw RException(
R__FAIL(std::string(
"cannot set CURLOPT_AWS_SIGV4: ") + GetCurlErrorString(rc)));
864 rc = curl_easy_setopt(
fHandle, CURLOPT_USERPWD, userPwd.c_str());
865 if (rc != CURLE_OK) {
866 throw RException(
R__FAIL(std::string(
"cannot set CURLOPT_USERPWD: ") + GetCurlErrorString(rc)));
882 rc = curl_easy_setopt(
fHandle, CURLOPT_AWS_SIGV4,
NULL);
884 rc = curl_easy_setopt(
fHandle, CURLOPT_USERPWD,
NULL);
887 default:
R__ASSERT(
false &&
"internal error: unknown credentials type");
904 const auto accessKey = std::getenv(
"S3_ACCESS_KEY");
905 if (accessKey && (accessKey[0] !=
'\0')) {
906 const auto secretKey = std::getenv(
"S3_SECRET_KEY");
907 if (!secretKey || (secretKey[0] ==
'\0')) {
908 R__LOG_WARNING(HTTPClientLog()) <<
"found S3_ACCESS_KEY environment variable but S3_SECRET_KEY unset. "
909 "Ignoring S3 credentials.";
912 const auto region = std::getenv(
"S3_REGION");
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
static const char * ToString(ENameCycleError err)
#define R__LOG_WARNING(...)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Encapsulates a curl easy handle and provides an interface to send HTTP HEAD and (multi-)range queries...
void SetCredentials(const RS3Credentials &credentials)
void * fHandle
the CURL easy handle corresponding to this connection
void Perform(RStatus &status)
std::string fEscapedUrl
The URL provided in the constructor escaped according to standard rules.
std::unique_ptr< char[]> fErrorBuffer
For use by libcurl.
RStatus SendRangesReq(std::size_t N, RUserRange *ranges)
Reads the given ranges from the remote resource.
static constexpr std::uint64_t kUnknownSize
Returned by SendHeadReq() if the HTTP response contains no content-length header.
std::size_t fMaxNRangesPerReqest
If set to zero, automatically adjust: try with all given ranges and as long as the number of ranges i...
RCurlConnection(const std::string &url)
RStatus SendHeadReq(std::uint64_t &remoteSize)
Checks if the resource exists and if it does, return the value of the content-length header as size.
static int GetCurlVersion()
Used for testing.
void SetCredentialsFromEnvironment()
Sets the credentials from process environment variables.
RResult< void > SetUrl(const std::string &url)
std::unique_ptr< RHTTPCredentials > fCredentials
EHTTPCredentialsType GetCredentialsType() const
Base class for all ROOT issued exceptions.
A log configuration for a channel, e.g.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Double_t result(Double_t *x, Double_t *par)
BVH_ALWAYS_INLINE T length(const Vec< T, N > &v)
constexpr auto empty(const C &c) -> decltype(c.empty())
Return value for both HEAD and GET requests.
@ kTooManyRanges
should not get to the user; number of request ranges is automatically reduced as needed
Caller-provided byte-range of the remote resource together with a pointer to a buffer.