[llvm] 7917b3c - [Debuginfod] Don't depend on Content-Length.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 21 10:27:51 PDT 2022


Author: Daniel Thornburgh
Date: 2022-03-21T17:27:45Z
New Revision: 7917b3c6957cdf9f4cb4f4908971184d7bbe38e7

URL: https://github.com/llvm/llvm-project/commit/7917b3c6957cdf9f4cb4f4908971184d7bbe38e7
DIFF: https://github.com/llvm/llvm-project/commit/7917b3c6957cdf9f4cb4f4908971184d7bbe38e7.diff

LOG: [Debuginfod] Don't depend on Content-Length.

The present implementation of debuginfod lookups requires the
Content-Length field to be populated in the HTTP server response.
Unfortunately, Content-Length is optional, and there are some real
scenarios where it's missing. (For example, a Google Cloud Storage
server doing on-the-fly gunzipping.)

This changes the debuginfod response handler to directly stream the
output to the cache file as it is received. In addition to allowing
lookups to proceed without a Content-Lenght, it seems somewhat more
straightforward to implement, and it allows the disk I/O to be
interleaved with the network I/O.

Reviewed By: noajshu

Differential Revision: https://reviews.llvm.org/D121720

Added: 
    

Modified: 
    llvm/include/llvm/Debuginfod/HTTPClient.h
    llvm/lib/Debuginfod/Debuginfod.cpp
    llvm/lib/Debuginfod/HTTPClient.cpp
    llvm/unittests/Debuginfod/CMakeLists.txt

Removed: 
    llvm/unittests/Debuginfod/HTTPClientTests.cpp


################################################################################
diff  --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h
index ca3b76ca9f3f4..6c94961032e75 100644
--- a/llvm/include/llvm/Debuginfod/HTTPClient.h
+++ b/llvm/include/llvm/Debuginfod/HTTPClient.h
@@ -7,9 +7,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains the declarations of the HTTPClient, HTTPMethod,
-/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as
-/// the HTTPResponseBuffer and HTTPRequest structs.
+/// This file contains the declarations of the HTTPClient library for issuing
+/// HTTP requests and handling the responses.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -40,43 +39,13 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B);
 /// of its methods.
 class HTTPResponseHandler {
 public:
-  /// Processes one line of HTTP response headers.
-  virtual Error handleHeaderLine(StringRef HeaderLine) = 0;
-
   /// Processes an additional chunk of bytes of the HTTP response body.
   virtual Error handleBodyChunk(StringRef BodyChunk) = 0;
 
-  /// Processes the HTTP response status code.
-  virtual Error handleStatusCode(unsigned Code) = 0;
-
 protected:
   ~HTTPResponseHandler();
 };
 
-/// An HTTP response status code bundled with a buffer to store the body.
-struct HTTPResponseBuffer {
-  unsigned Code = 0;
-  std::unique_ptr<WritableMemoryBuffer> Body;
-};
-
-/// A simple handler which writes returned data to an HTTPResponseBuffer.
-/// Ignores all headers except the Content-Length, which it uses to
-/// allocate an appropriately-sized Body buffer.
-class BufferedHTTPResponseHandler final : public HTTPResponseHandler {
-  size_t Offset = 0;
-
-public:
-  /// Stores the data received from the HTTP server.
-  HTTPResponseBuffer ResponseBuffer;
-
-  /// These callbacks store the body and status code in an HTTPResponseBuffer
-  /// allocated based on Content-Length. The Content-Length header must be
-  /// handled by handleHeaderLine before any calls to handleBodyChunk.
-  Error handleHeaderLine(StringRef HeaderLine) override;
-  Error handleBodyChunk(StringRef BodyChunk) override;
-  Error handleStatusCode(unsigned Code) override;
-};
-
 /// A reusable client that can perform HTTPRequests through a network socket.
 class HTTPClient {
 #ifdef LLVM_ENABLE_CURL
@@ -107,13 +76,8 @@ class HTTPClient {
   /// Handler method.
   Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler);
 
-  /// Performs the Request with the default BufferedHTTPResponseHandler, and
-  /// returns its HTTPResponseBuffer or an Error.
-  Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request);
-
-  /// Performs an HTTPRequest with the default configuration to make a GET
-  /// request to the given Url. Returns an HTTPResponseBuffer or an Error.
-  Expected<HTTPResponseBuffer> get(StringRef Url);
+  /// Returns the last received response code or zero if none.
+  unsigned responseCode();
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 27614572766d4..a18cfdd41037a 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -115,6 +115,40 @@ Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
                                      getDefaultDebuginfodTimeout());
 }
 
+namespace {
+
+/// A simple handler which streams the returned data to a cache file. The cache
+/// file is only created if a 200 OK status is observed.
+class StreamedHTTPResponseHandler : public HTTPResponseHandler {
+  using CreateStreamFn =
+      std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
+  CreateStreamFn CreateStream;
+  HTTPClient &Client;
+  std::unique_ptr<CachedFileStream> FileStream;
+
+public:
+  StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
+      : CreateStream(CreateStream), Client(Client) {}
+
+  Error handleBodyChunk(StringRef BodyChunk) override;
+};
+
+} // namespace
+
+Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
+  if (!FileStream) {
+    if (Client.responseCode() != 200)
+      return Error::success();
+    Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
+        CreateStream();
+    if (!FileStreamOrError)
+      return FileStreamOrError.takeError();
+    FileStream = std::move(*FileStreamOrError);
+  }
+  *FileStream->OS << BodyChunk;
+  return Error::success();
+}
+
 Expected<std::string> getCachedOrDownloadArtifact(
     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
@@ -155,28 +189,18 @@ Expected<std::string> getCachedOrDownloadArtifact(
     SmallString<64> ArtifactUrl;
     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
 
-    Expected<HTTPResponseBuffer> ResponseOrErr = Client.get(ArtifactUrl);
-    if (!ResponseOrErr)
-      return ResponseOrErr.takeError();
+    // Perform the HTTP request and if successful, write the response body to
+    // the cache.
+    StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); },
+                                        Client);
+    HTTPRequest Request(ArtifactUrl);
+    Error Err = Client.perform(Request, Handler);
+    if (Err)
+      return Err;
 
-    HTTPResponseBuffer &Response = *ResponseOrErr;
-    if (Response.Code != 200)
+    if (Client.responseCode() != 200)
       continue;
 
-    // We have retrieved the artifact from this server, and now add it to the
-    // file cache.
-    Expected<std::unique_ptr<CachedFileStream>> FileStreamOrErr =
-        CacheAddStream(Task);
-    if (!FileStreamOrErr)
-      return FileStreamOrErr.takeError();
-    std::unique_ptr<CachedFileStream> &FileStream = *FileStreamOrErr;
-    if (!Response.Body)
-      return createStringError(
-          errc::io_error, "Unallocated MemoryBuffer in HTTPResponseBuffer.");
-
-    *FileStream->OS << StringRef(Response.Body->getBufferStart(),
-                                 Response.Body->getBufferSize());
-
     // Return the path to the artifact on disk.
     return std::string(AbsCachedArtifactPath);
   }

diff  --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp
index cba342c9b3439..3376eaa7cd0d2 100644
--- a/llvm/lib/Debuginfod/HTTPClient.cpp
+++ b/llvm/lib/Debuginfod/HTTPClient.cpp
@@ -7,9 +7,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-///
-/// This file defines the methods of the HTTPRequest, HTTPClient, and
-/// BufferedHTTPResponseHandler classes.
+/// This file defines the implementation of the HTTPClient library for issuing
+/// HTTP requests and handling the responses.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -34,44 +33,6 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
 
 HTTPResponseHandler::~HTTPResponseHandler() = default;
 
-static inline bool parseContentLengthHeader(StringRef LineRef,
-                                            size_t &ContentLength) {
-  // Content-Length is a mandatory header, and the only one we handle.
-  return LineRef.consume_front("Content-Length: ") &&
-         to_integer(LineRef.trim(), ContentLength, 10);
-}
-
-Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
-  if (ResponseBuffer.Body)
-    return Error::success();
-
-  size_t ContentLength;
-  if (parseContentLengthHeader(HeaderLine, ContentLength))
-    ResponseBuffer.Body =
-        WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
-
-  return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
-  if (!ResponseBuffer.Body)
-    return createStringError(errc::io_error,
-                             "Unallocated response buffer. HTTP Body data "
-                             "received before Content-Length header.");
-  if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
-    return createStringError(errc::io_error,
-                             "Content size exceeds buffer size.");
-  memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
-         BodyChunk.size());
-  Offset += BodyChunk.size();
-  return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
-  ResponseBuffer.Code = Code;
-  return Error::success();
-}
-
 bool HTTPClient::IsInitialized = false;
 
 class HTTPClientCleanup {
@@ -80,18 +41,6 @@ class HTTPClientCleanup {
 };
 static const HTTPClientCleanup Cleanup;
 
-Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
-  BufferedHTTPResponseHandler Handler;
-  if (Error Err = perform(Request, Handler))
-    return std::move(Err);
-  return std::move(Handler.ResponseBuffer);
-}
-
-Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
-  HTTPRequest Request(Url);
-  return perform(Request);
-}
-
 #ifdef LLVM_ENABLE_CURL
 
 bool HTTPClient::isAvailable() { return true; }
@@ -128,18 +77,6 @@ struct CurlHTTPRequest {
   llvm::Error ErrorState = Error::success();
 };
 
-static size_t curlHeaderFunction(char *Contents, size_t Size, size_t NMemb,
-                                 CurlHTTPRequest *CurlRequest) {
-  assert(Size == 1 && "The Size passed by libCURL to CURLOPT_HEADERFUNCTION "
-                      "should always be 1.");
-  if (Error Err =
-          CurlRequest->Handler.handleHeaderLine(StringRef(Contents, NMemb))) {
-    CurlRequest->storeError(std::move(Err));
-    return 0;
-  }
-  return NMemb;
-}
-
 static size_t curlWriteFunction(char *Contents, size_t Size, size_t NMemb,
                                 CurlHTTPRequest *CurlRequest) {
   Size *= NMemb;
@@ -160,7 +97,6 @@ HTTPClient::HTTPClient() {
   assert(Curl && "Curl could not be initialized");
   // Set the callback hooks.
   curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
-  curl_easy_setopt(Curl, CURLOPT_HEADERFUNCTION, curlHeaderFunction);
 }
 
 HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); }
@@ -177,22 +113,19 @@ Error HTTPClient::perform(const HTTPRequest &Request,
 
   CurlHTTPRequest CurlRequest(Handler);
   curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest);
-  curl_easy_setopt(Curl, CURLOPT_HEADERDATA, &CurlRequest);
   CURLcode CurlRes = curl_easy_perform(Curl);
   if (CurlRes != CURLE_OK)
     return joinErrors(std::move(CurlRequest.ErrorState),
                       createStringError(errc::io_error,
                                         "curl_easy_perform() failed: %s\n",
                                         curl_easy_strerror(CurlRes)));
-  if (CurlRequest.ErrorState)
-    return std::move(CurlRequest.ErrorState);
+  return std::move(CurlRequest.ErrorState);
+}
 
-  unsigned Code;
+unsigned HTTPClient::responseCode() {
+  long Code = 0;
   curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code);
-  if (Error Err = Handler.handleStatusCode(Code))
-    return joinErrors(std::move(CurlRequest.ErrorState), std::move(Err));
-
-  return std::move(CurlRequest.ErrorState);
+  return Code;
 }
 
 #else
@@ -214,4 +147,8 @@ Error HTTPClient::perform(const HTTPRequest &Request,
   llvm_unreachable("No HTTP Client implementation available.");
 }
 
+unsigned HTTPClient::responseCode() {
+  llvm_unreachable("No HTTP Client implementation available.");
+}
+
 #endif

diff  --git a/llvm/unittests/Debuginfod/CMakeLists.txt b/llvm/unittests/Debuginfod/CMakeLists.txt
index d2109fe8af175..cb800872e10e8 100644
--- a/llvm/unittests/Debuginfod/CMakeLists.txt
+++ b/llvm/unittests/Debuginfod/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_unittest(DebuginfodTests
-  HTTPClientTests.cpp
   DebuginfodTests.cpp
   )
 

diff  --git a/llvm/unittests/Debuginfod/HTTPClientTests.cpp b/llvm/unittests/Debuginfod/HTTPClientTests.cpp
deleted file mode 100644
index 7f7d201064a8a..0000000000000
--- a/llvm/unittests/Debuginfod/HTTPClientTests.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- llvm/unittest/Debuginfod/HTTPClientTests.cpp - unit tests ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Debuginfod/HTTPClient.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-TEST(BufferedHTTPResponseHandler, Lifecycle) {
-  BufferedHTTPResponseHandler Handler;
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: 36\r\n"),
-                    Succeeded());
-
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("body:"), Succeeded());
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("this puts the total at 36 chars"),
-                    Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Body->MemoryBuffer::getBuffer(),
-            "body:this puts the total at 36 chars");
-
-  // Additional content should be rejected by the handler.
-  EXPECT_THAT_ERROR(
-      Handler.handleBodyChunk("extra content past the content-length"),
-      Failed<llvm::StringError>());
-
-  // Test response code is set.
-  EXPECT_THAT_ERROR(Handler.handleStatusCode(200u), Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Code, 200u);
-  EXPECT_THAT_ERROR(Handler.handleStatusCode(400u), Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Code, 400u);
-}
-
-TEST(BufferedHTTPResponseHandler, NoContentLengthLifecycle) {
-  BufferedHTTPResponseHandler Handler;
-  EXPECT_EQ(Handler.ResponseBuffer.Code, 0u);
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-
-  // A body chunk passed before the content-length header is an error.
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("body"),
-                    Failed<llvm::StringError>());
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("a header line"), Succeeded());
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("body"),
-                    Failed<llvm::StringError>());
-}
-
-TEST(BufferedHTTPResponseHandler, ZeroContentLength) {
-  BufferedHTTPResponseHandler Handler;
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: 0"), Succeeded());
-  EXPECT_NE(Handler.ResponseBuffer.Body, nullptr);
-  EXPECT_EQ(Handler.ResponseBuffer.Body->getBufferSize(), 0u);
-
-  // All content should be rejected by the handler.
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("non-empty body content"),
-                    Failed<llvm::StringError>());
-}
-
-TEST(BufferedHTTPResponseHandler, MalformedContentLength) {
-  // Check that several invalid content lengths are ignored.
-  BufferedHTTPResponseHandler Handler;
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: fff"),
-                    Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length:    "),
-                    Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-
-  using namespace std::string_literals;
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: \0\0\0"s),
-                    Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-
-  EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: -11"),
-                    Succeeded());
-  EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
-
-  // All content should be rejected by the handler because no valid
-  // Content-Length header has been received.
-  EXPECT_THAT_ERROR(Handler.handleBodyChunk("non-empty body content"),
-                    Failed<llvm::StringError>());
-}
-
-#ifdef LLVM_ENABLE_CURL
-
-TEST(HTTPClient, isAvailable) { EXPECT_TRUE(HTTPClient::isAvailable()); }
-
-#endif


        


More information about the llvm-commits mailing list