[llvm] babef90 - [llvm] [Debuginfod] DebuginfodCollection and DebuginfodServer for tracking local debuginfo.

Noah Shutty via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 6 13:02:18 PDT 2022


Author: Noah Shutty
Date: 2022-07-06T20:02:14Z
New Revision: babef908cc135c05625d7b36e38b55115ec9dc1e

URL: https://github.com/llvm/llvm-project/commit/babef908cc135c05625d7b36e38b55115ec9dc1e
DIFF: https://github.com/llvm/llvm-project/commit/babef908cc135c05625d7b36e38b55115ec9dc1e.diff

LOG: [llvm] [Debuginfod] DebuginfodCollection and DebuginfodServer for tracking local debuginfo.

This library implements the class `DebuginfodCollection`, which scans a set of directories for binaries, classifying them according to whether they contain debuginfo. This also provides the `DebuginfodServer`, an `HTTPServer` which serves debuginfod's `/debuginfo` and `/executable` endpoints. This is intended as the final new supporting library required for `llvm-debuginfod`.

As implemented here, `DebuginfodCollection` only finds ELF binaries and DWARF debuginfo. All other files are ignored. However, the class interface is format-agnostic. Generalizing to support other platforms will require refactoring of LLVM's object parsing libraries to eliminate use of `report_fatal_error` ([[ https://github.com/llvm/llvm-project/blob/main/llvm/lib/Object/WasmObjectFile.cpp#L74 | e.g. when reading WASM files ]]), so that the debuginfod daemon does not crash when it encounters a malformed file on the disk.

The `DebuginfodCollection` is tested by end-to-end tests of the debuginfod server (D114846).

Reviewed By: mysterymath

Differential Revision: https://reviews.llvm.org/D114845

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
    llvm/include/llvm/Debuginfod/Debuginfod.h
    llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
    llvm/lib/Debuginfod/CMakeLists.txt
    llvm/lib/Debuginfod/Debuginfod.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 00c4bf0a615f1..f233a183912b2 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -243,6 +243,8 @@ class CachedBinary : public ilist_node<CachedBinary> {
   std::function<void()> Evictor;
 };
 
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj);
+
 } // end namespace symbolize
 } // end namespace llvm
 

diff  --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h
index 064cfa75b1a1b..efa460466fc53 100644
--- a/llvm/include/llvm/Debuginfod/Debuginfod.h
+++ b/llvm/include/llvm/Debuginfod/Debuginfod.h
@@ -7,23 +7,31 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains the declarations of getCachedOrDownloadArtifact and
-/// several convenience functions for specific artifact types:
-/// getCachedOrDownloadSource, getCachedOrDownloadExecutable, and
-/// getCachedOrDownloadDebuginfo. This file also declares
-/// getDefaultDebuginfodUrls and getDefaultDebuginfodCacheDirectory.
-///
+/// This file contains several declarations for the debuginfod client and
+/// server. The client functions are getDefaultDebuginfodUrls,
+/// getCachedOrDownloadArtifact, and several convenience functions for specific
+/// artifact types: getCachedOrDownloadSource, getCachedOrDownloadExecutable,
+/// and getCachedOrDownloadDebuginfo. For the server, this file declares the
+/// DebuginfodLogEntry and DebuginfodServer structs, as well as the
+/// DebuginfodLog, DebuginfodCollection classes.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_DEBUGINFOD_DEBUGINFOD_H
 #define LLVM_DEBUGINFOD_DEBUGINFOD_H
 
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Debuginfod/HTTPServer.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Timer.h"
 
 #include <chrono>
+#include <queue>
 
 namespace llvm {
 
@@ -68,6 +76,68 @@ Expected<std::string> getCachedOrDownloadArtifact(
     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout);
 
+class ThreadPool;
+
+struct DebuginfodLogEntry {
+  std::string Message;
+  DebuginfodLogEntry() = default;
+  DebuginfodLogEntry(const Twine &Message);
+};
+
+class DebuginfodLog {
+  std::mutex QueueMutex;
+  std::condition_variable QueueCondition;
+  std::queue<DebuginfodLogEntry> LogEntryQueue;
+
+public:
+  // Adds a log entry to end of the queue.
+  void push(DebuginfodLogEntry Entry);
+  // Adds a log entry to end of the queue.
+  void push(const Twine &Message);
+  // Blocks until there are log entries in the queue, then pops and returns the
+  // first one.
+  DebuginfodLogEntry pop();
+};
+
+/// Tracks a collection of debuginfod artifacts on the local filesystem.
+class DebuginfodCollection {
+  SmallVector<std::string, 1> Paths;
+  sys::RWMutex BinariesMutex;
+  StringMap<std::string> Binaries;
+  sys::RWMutex DebugBinariesMutex;
+  StringMap<std::string> DebugBinaries;
+  Error findBinaries(StringRef Path);
+  Expected<Optional<std::string>> getDebugBinaryPath(BuildIDRef);
+  Expected<Optional<std::string>> getBinaryPath(BuildIDRef);
+  // If the collection has not been updated since MinInterval, call update() and
+  // return true. Otherwise return false. If update returns an error, return the
+  // error.
+  Expected<bool> updateIfStale();
+  DebuginfodLog &Log;
+  ThreadPool &Pool;
+  Timer UpdateTimer;
+  sys::Mutex UpdateMutex;
+
+  // Minimum update interval, in seconds, for on-demand updates triggered when a
+  // build-id is not found.
+  double MinInterval;
+
+public:
+  DebuginfodCollection(ArrayRef<StringRef> Paths, DebuginfodLog &Log,
+                       ThreadPool &Pool, double MinInterval);
+  Error update();
+  Error updateForever(std::chrono::milliseconds Interval);
+  Expected<std::string> findDebugBinaryPath(BuildIDRef);
+  Expected<std::string> findBinaryPath(BuildIDRef);
+};
+
+struct DebuginfodServer {
+  HTTPServer Server;
+  DebuginfodLog &Log;
+  DebuginfodCollection &Collection;
+  DebuginfodServer(DebuginfodLog &Log, DebuginfodCollection &Collection);
+};
+
 } // end namespace llvm
 
 #endif

diff  --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index d2ff8aa7c995f..c239d4c260ec9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -327,6 +327,8 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) {
   return {};
 }
 
+} // end anonymous namespace
+
 Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
   Optional<ArrayRef<uint8_t>> BuildID;
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
@@ -342,8 +344,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
   return BuildID;
 }
 
-} // end anonymous namespace
-
 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
                                            const MachOObjectFile *MachExeObj,
                                            const std::string &ArchName) {

diff  --git a/llvm/lib/Debuginfod/CMakeLists.txt b/llvm/lib/Debuginfod/CMakeLists.txt
index 06f7441280a91..f5ea9e99622a0 100644
--- a/llvm/lib/Debuginfod/CMakeLists.txt
+++ b/llvm/lib/Debuginfod/CMakeLists.txt
@@ -25,4 +25,5 @@ add_llvm_library(LLVMDebuginfod
   LINK_COMPONENTS
   Support
   Symbolize
+  DebugInfoDWARF
   )

diff  --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 7b1c36fdbe09c..bd54d698eb269 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -8,25 +8,39 @@
 ///
 /// \file
 ///
-/// This file defines the fetchInfo function, which retrieves
-/// any of the three supported artifact types: (executable, debuginfo, source
-/// file) associated with a build-id from debuginfod servers. If a source file
-/// is to be fetched, its absolute path must be specified in the Description
-/// argument to fetchInfo.
+/// This file contains several definitions for the debuginfod client and server.
+/// For the client, this file defines the fetchInfo function. For the server,
+/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
+/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
+/// function retrieves any of the three supported artifact types: (executable,
+/// debuginfo, source file) associated with a build-id from debuginfod servers.
+/// If a source file is to be fetched, its absolute path must be specified in
+/// the Description argument to fetchInfo. The DebuginfodLogEntry,
+/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
+/// scan the local filesystem for binaries and serve the debuginfod protocol.
 ///
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CachePruning.h"
 #include "llvm/Support/Caching.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/xxhash.h"
 
+#include <atomic>
+
 namespace llvm {
 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
 
@@ -46,6 +60,8 @@ Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
   return DebuginfodUrls;
 }
 
+/// Finds a default local file caching directory for the debuginfod client,
+/// first checking DEBUGINFOD_CACHE_PATH.
 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
     return CacheDirectoryEnv;
@@ -208,4 +224,293 @@ Expected<std::string> getCachedOrDownloadArtifact(
 
   return createStringError(errc::argument_out_of_domain, "build id not found");
 }
+
+DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
+    : Message(Message.str()) {}
+
+void DebuginfodLog::push(const Twine &Message) {
+  push(DebuginfodLogEntry(Message));
+}
+
+void DebuginfodLog::push(DebuginfodLogEntry Entry) {
+  {
+    std::lock_guard<std::mutex> Guard(QueueMutex);
+    LogEntryQueue.push(Entry);
+  }
+  QueueCondition.notify_one();
+}
+
+DebuginfodLogEntry DebuginfodLog::pop() {
+  {
+    std::unique_lock<std::mutex> Guard(QueueMutex);
+    // Wait for messages to be pushed into the queue.
+    QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
+  }
+  std::lock_guard<std::mutex> Guard(QueueMutex);
+  if (!LogEntryQueue.size())
+    llvm_unreachable("Expected message in the queue.");
+
+  DebuginfodLogEntry Entry = LogEntryQueue.front();
+  LogEntryQueue.pop();
+  return Entry;
+}
+
+DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
+                                           DebuginfodLog &Log, ThreadPool &Pool,
+                                           double MinInterval)
+    : Log(Log), Pool(Pool), MinInterval(MinInterval) {
+  for (StringRef Path : PathsRef)
+    Paths.push_back(Path.str());
+}
+
+Error DebuginfodCollection::update() {
+  std::lock_guard<sys::Mutex> Guard(UpdateMutex);
+  if (UpdateTimer.isRunning())
+    UpdateTimer.stopTimer();
+  UpdateTimer.clear();
+  for (const std::string &Path : Paths) {
+    Log.push("Updating binaries at path " + Path);
+    if (Error Err = findBinaries(Path))
+      return Err;
+  }
+  Log.push("Updated collection");
+  UpdateTimer.startTimer();
+  return Error::success();
+}
+
+Expected<bool> DebuginfodCollection::updateIfStale() {
+  if (!UpdateTimer.isRunning())
+    return false;
+  UpdateTimer.stopTimer();
+  double Time = UpdateTimer.getTotalTime().getWallTime();
+  UpdateTimer.startTimer();
+  if (Time < MinInterval)
+    return false;
+  if (Error Err = update())
+    return std::move(Err);
+  return true;
+}
+
+Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
+  while (true) {
+    if (Error Err = update())
+      return Err;
+    std::this_thread::sleep_for(Interval);
+  }
+  llvm_unreachable("updateForever loop should never end");
+}
+
+static bool isDebugBinary(object::ObjectFile *Object) {
+  // TODO: handle PDB debuginfo
+  std::unique_ptr<DWARFContext> Context = DWARFContext::create(
+      *Object, DWARFContext::ProcessDebugRelocations::Process);
+  const DWARFObject &DObj = Context->getDWARFObj();
+  unsigned NumSections = 0;
+  DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; });
+  return NumSections;
+}
+
+static bool hasELFMagic(StringRef FilePath) {
+  file_magic Type;
+  std::error_code EC = identify_magic(FilePath, Type);
+  if (EC)
+    return false;
+  switch (Type) {
+  case file_magic::elf:
+  case file_magic::elf_relocatable:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core:
+    return true;
+  default:
+    return false;
+  }
+}
+
+Error DebuginfodCollection::findBinaries(StringRef Path) {
+  std::error_code EC;
+  sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
+  std::mutex IteratorMutex;
+  ThreadPoolTaskGroup IteratorGroup(Pool);
+  for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
+       WorkerIndex++) {
+    IteratorGroup.async([&, this]() -> void {
+      std::string FilePath;
+      while (true) {
+        {
+          // Check if iteration is over or there is an error during iteration
+          std::lock_guard<std::mutex> Guard(IteratorMutex);
+          if (I == E || EC)
+            return;
+          // Grab a file path from the directory iterator and advance the
+          // iterator.
+          FilePath = I->path();
+          I.increment(EC);
+        }
+
+        // Inspect the file at this path to determine if it is debuginfo.
+        if (!hasELFMagic(FilePath))
+          continue;
+
+        Expected<object::OwningBinary<object::Binary>> BinOrErr =
+            object::createBinary(FilePath);
+
+        if (!BinOrErr) {
+          consumeError(BinOrErr.takeError());
+          continue;
+        }
+        object::Binary *Bin = std::move(BinOrErr.get().getBinary());
+        if (!Bin->isObject())
+          continue;
+
+        // TODO: Support non-ELF binaries
+        object::ELFObjectFileBase *Object =
+            dyn_cast<object::ELFObjectFileBase>(Bin);
+        if (!Object)
+          continue;
+
+        Optional<BuildIDRef> ID = symbolize::getBuildID(Object);
+        if (!ID)
+          continue;
+
+        std::string IDString = buildIDToString(ID.getValue());
+        if (isDebugBinary(Object)) {
+          std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
+          DebugBinaries[IDString] = FilePath;
+        } else {
+          std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
+          Binaries[IDString] = FilePath;
+        }
+      }
+    });
+  }
+  IteratorGroup.wait();
+  std::unique_lock<std::mutex> Guard(IteratorMutex);
+  if (EC)
+    return errorCodeToError(EC);
+  return Error::success();
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
+  Log.push("getting binary path of ID " + buildIDToString(ID));
+  std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
+  auto Loc = Binaries.find(buildIDToString(ID));
+  if (Loc != Binaries.end()) {
+    std::string Path = Loc->getValue();
+    return Path;
+  }
+  return None;
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
+  Log.push("getting debug binary path of ID " + buildIDToString(ID));
+  std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
+  auto Loc = DebugBinaries.find(buildIDToString(ID));
+  if (Loc != DebugBinaries.end()) {
+    std::string Path = Loc->getValue();
+    return Path;
+  }
+  return None;
+}
+
+Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
+  {
+    // Check collection; perform on-demand update if stale.
+    Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID);
+    if (!PathOrErr)
+      return PathOrErr.takeError();
+    Optional<std::string> Path = *PathOrErr;
+    if (!Path) {
+      Expected<bool> UpdatedOrErr = updateIfStale();
+      if (!UpdatedOrErr)
+        return UpdatedOrErr.takeError();
+      if (*UpdatedOrErr) {
+        // Try once more.
+        PathOrErr = getBinaryPath(ID);
+        if (!PathOrErr)
+          return PathOrErr.takeError();
+        Path = *PathOrErr;
+      }
+    }
+    if (Path)
+      return Path.getValue();
+  }
+
+  // Try federation.
+  Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
+  if (!PathOrErr)
+    consumeError(PathOrErr.takeError());
+
+  // Fall back to debug binary.
+  return findDebugBinaryPath(ID);
+}
+
+Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
+  // Check collection; perform on-demand update if stale.
+  Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
+  if (!PathOrErr)
+    return PathOrErr.takeError();
+  Optional<std::string> Path = *PathOrErr;
+  if (!Path) {
+    Expected<bool> UpdatedOrErr = updateIfStale();
+    if (!UpdatedOrErr)
+      return UpdatedOrErr.takeError();
+    if (*UpdatedOrErr) {
+      // Try once more.
+      PathOrErr = getBinaryPath(ID);
+      if (!PathOrErr)
+        return PathOrErr.takeError();
+      Path = *PathOrErr;
+    }
+  }
+  if (Path)
+    return Path.getValue();
+
+  // Try federation.
+  return getCachedOrDownloadDebuginfo(ID);
+}
+
+DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
+                                   DebuginfodCollection &Collection)
+    : Log(Log), Collection(Collection) {
+  cantFail(
+      Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
+        Log.push("GET " + Request.UrlPath);
+        std::string IDString;
+        if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+          Request.setResponse(
+              {404, "text/plain", "Build ID is not a hex string\n"});
+          return;
+        }
+        BuildID ID(IDString.begin(), IDString.end());
+        Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
+        if (Error Err = PathOrErr.takeError()) {
+          consumeError(std::move(Err));
+          Request.setResponse({404, "text/plain", "Build ID not found\n"});
+          return;
+        }
+        streamFile(Request, *PathOrErr);
+      }));
+  cantFail(
+      Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
+        Log.push("GET " + Request.UrlPath);
+        std::string IDString;
+        if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+          Request.setResponse(
+              {404, "text/plain", "Build ID is not a hex string\n"});
+          return;
+        }
+        BuildID ID(IDString.begin(), IDString.end());
+        Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
+        if (Error Err = PathOrErr.takeError()) {
+          consumeError(std::move(Err));
+          Request.setResponse({404, "text/plain", "Build ID not found\n"});
+          return;
+        }
+        streamFile(Request, *PathOrErr);
+      }));
+}
+
 } // namespace llvm


        


More information about the llvm-commits mailing list