[llvm] babef90 - [llvm] [Debuginfod] DebuginfodCollection and DebuginfodServer for tracking local debuginfo.
Noah Shutty via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 6 13:02:18 PDT 2022
Author: Noah Shutty
Date: 2022-07-06T20:02:14Z
New Revision: babef908cc135c05625d7b36e38b55115ec9dc1e
URL: https://github.com/llvm/llvm-project/commit/babef908cc135c05625d7b36e38b55115ec9dc1e
DIFF: https://github.com/llvm/llvm-project/commit/babef908cc135c05625d7b36e38b55115ec9dc1e.diff
LOG: [llvm] [Debuginfod] DebuginfodCollection and DebuginfodServer for tracking local debuginfo.
This library implements the class `DebuginfodCollection`, which scans a set of directories for binaries, classifying them according to whether they contain debuginfo. This also provides the `DebuginfodServer`, an `HTTPServer` which serves debuginfod's `/debuginfo` and `/executable` endpoints. This is intended as the final new supporting library required for `llvm-debuginfod`.
As implemented here, `DebuginfodCollection` only finds ELF binaries and DWARF debuginfo. All other files are ignored. However, the class interface is format-agnostic. Generalizing to support other platforms will require refactoring of LLVM's object parsing libraries to eliminate use of `report_fatal_error` ([[ https://github.com/llvm/llvm-project/blob/main/llvm/lib/Object/WasmObjectFile.cpp#L74 | e.g. when reading WASM files ]]), so that the debuginfod daemon does not crash when it encounters a malformed file on the disk.
The `DebuginfodCollection` is tested by end-to-end tests of the debuginfod server (D114846).
Reviewed By: mysterymath
Differential Revision: https://reviews.llvm.org/D114845
Added:
Modified:
llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
llvm/include/llvm/Debuginfod/Debuginfod.h
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
llvm/lib/Debuginfod/CMakeLists.txt
llvm/lib/Debuginfod/Debuginfod.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 00c4bf0a615f1..f233a183912b2 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -243,6 +243,8 @@ class CachedBinary : public ilist_node<CachedBinary> {
std::function<void()> Evictor;
};
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj);
+
} // end namespace symbolize
} // end namespace llvm
diff --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h
index 064cfa75b1a1b..efa460466fc53 100644
--- a/llvm/include/llvm/Debuginfod/Debuginfod.h
+++ b/llvm/include/llvm/Debuginfod/Debuginfod.h
@@ -7,23 +7,31 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file contains the declarations of getCachedOrDownloadArtifact and
-/// several convenience functions for specific artifact types:
-/// getCachedOrDownloadSource, getCachedOrDownloadExecutable, and
-/// getCachedOrDownloadDebuginfo. This file also declares
-/// getDefaultDebuginfodUrls and getDefaultDebuginfodCacheDirectory.
-///
+/// This file contains several declarations for the debuginfod client and
+/// server. The client functions are getDefaultDebuginfodUrls,
+/// getCachedOrDownloadArtifact, and several convenience functions for specific
+/// artifact types: getCachedOrDownloadSource, getCachedOrDownloadExecutable,
+/// and getCachedOrDownloadDebuginfo. For the server, this file declares the
+/// DebuginfodLogEntry and DebuginfodServer structs, as well as the
+/// DebuginfodLog, DebuginfodCollection classes.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFOD_DEBUGINFOD_H
#define LLVM_DEBUGINFOD_DEBUGINFOD_H
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Debuginfod/HTTPServer.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Timer.h"
#include <chrono>
+#include <queue>
namespace llvm {
@@ -68,6 +76,68 @@ Expected<std::string> getCachedOrDownloadArtifact(
StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout);
+class ThreadPool;
+
+struct DebuginfodLogEntry {
+ std::string Message;
+ DebuginfodLogEntry() = default;
+ DebuginfodLogEntry(const Twine &Message);
+};
+
+class DebuginfodLog {
+ std::mutex QueueMutex;
+ std::condition_variable QueueCondition;
+ std::queue<DebuginfodLogEntry> LogEntryQueue;
+
+public:
+ // Adds a log entry to end of the queue.
+ void push(DebuginfodLogEntry Entry);
+ // Adds a log entry to end of the queue.
+ void push(const Twine &Message);
+ // Blocks until there are log entries in the queue, then pops and returns the
+ // first one.
+ DebuginfodLogEntry pop();
+};
+
+/// Tracks a collection of debuginfod artifacts on the local filesystem.
+class DebuginfodCollection {
+ SmallVector<std::string, 1> Paths;
+ sys::RWMutex BinariesMutex;
+ StringMap<std::string> Binaries;
+ sys::RWMutex DebugBinariesMutex;
+ StringMap<std::string> DebugBinaries;
+ Error findBinaries(StringRef Path);
+ Expected<Optional<std::string>> getDebugBinaryPath(BuildIDRef);
+ Expected<Optional<std::string>> getBinaryPath(BuildIDRef);
+ // If the collection has not been updated since MinInterval, call update() and
+ // return true. Otherwise return false. If update returns an error, return the
+ // error.
+ Expected<bool> updateIfStale();
+ DebuginfodLog &Log;
+ ThreadPool &Pool;
+ Timer UpdateTimer;
+ sys::Mutex UpdateMutex;
+
+ // Minimum update interval, in seconds, for on-demand updates triggered when a
+ // build-id is not found.
+ double MinInterval;
+
+public:
+ DebuginfodCollection(ArrayRef<StringRef> Paths, DebuginfodLog &Log,
+ ThreadPool &Pool, double MinInterval);
+ Error update();
+ Error updateForever(std::chrono::milliseconds Interval);
+ Expected<std::string> findDebugBinaryPath(BuildIDRef);
+ Expected<std::string> findBinaryPath(BuildIDRef);
+};
+
+struct DebuginfodServer {
+ HTTPServer Server;
+ DebuginfodLog &Log;
+ DebuginfodCollection &Collection;
+ DebuginfodServer(DebuginfodLog &Log, DebuginfodCollection &Collection);
+};
+
} // end namespace llvm
#endif
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index d2ff8aa7c995f..c239d4c260ec9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -327,6 +327,8 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) {
return {};
}
+} // end anonymous namespace
+
Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
Optional<ArrayRef<uint8_t>> BuildID;
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
@@ -342,8 +344,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
return BuildID;
}
-} // end anonymous namespace
-
ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
const MachOObjectFile *MachExeObj,
const std::string &ArchName) {
diff --git a/llvm/lib/Debuginfod/CMakeLists.txt b/llvm/lib/Debuginfod/CMakeLists.txt
index 06f7441280a91..f5ea9e99622a0 100644
--- a/llvm/lib/Debuginfod/CMakeLists.txt
+++ b/llvm/lib/Debuginfod/CMakeLists.txt
@@ -25,4 +25,5 @@ add_llvm_library(LLVMDebuginfod
LINK_COMPONENTS
Support
Symbolize
+ DebugInfoDWARF
)
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 7b1c36fdbe09c..bd54d698eb269 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -8,25 +8,39 @@
///
/// \file
///
-/// This file defines the fetchInfo function, which retrieves
-/// any of the three supported artifact types: (executable, debuginfo, source
-/// file) associated with a build-id from debuginfod servers. If a source file
-/// is to be fetched, its absolute path must be specified in the Description
-/// argument to fetchInfo.
+/// This file contains several definitions for the debuginfod client and server.
+/// For the client, this file defines the fetchInfo function. For the server,
+/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
+/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
+/// function retrieves any of the three supported artifact types: (executable,
+/// debuginfo, source file) associated with a build-id from debuginfod servers.
+/// If a source file is to be fetched, its absolute path must be specified in
+/// the Description argument to fetchInfo. The DebuginfodLogEntry,
+/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
+/// scan the local filesystem for binaries and serve the debuginfod protocol.
///
//===----------------------------------------------------------------------===//
#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/xxhash.h"
+#include <atomic>
+
namespace llvm {
static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
@@ -46,6 +60,8 @@ Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
return DebuginfodUrls;
}
+/// Finds a default local file caching directory for the debuginfod client,
+/// first checking DEBUGINFOD_CACHE_PATH.
Expected<std::string> getDefaultDebuginfodCacheDirectory() {
if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
return CacheDirectoryEnv;
@@ -208,4 +224,293 @@ Expected<std::string> getCachedOrDownloadArtifact(
return createStringError(errc::argument_out_of_domain, "build id not found");
}
+
+DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
+ : Message(Message.str()) {}
+
+void DebuginfodLog::push(const Twine &Message) {
+ push(DebuginfodLogEntry(Message));
+}
+
+void DebuginfodLog::push(DebuginfodLogEntry Entry) {
+ {
+ std::lock_guard<std::mutex> Guard(QueueMutex);
+ LogEntryQueue.push(Entry);
+ }
+ QueueCondition.notify_one();
+}
+
+DebuginfodLogEntry DebuginfodLog::pop() {
+ {
+ std::unique_lock<std::mutex> Guard(QueueMutex);
+ // Wait for messages to be pushed into the queue.
+ QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
+ }
+ std::lock_guard<std::mutex> Guard(QueueMutex);
+ if (!LogEntryQueue.size())
+ llvm_unreachable("Expected message in the queue.");
+
+ DebuginfodLogEntry Entry = LogEntryQueue.front();
+ LogEntryQueue.pop();
+ return Entry;
+}
+
+DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
+ DebuginfodLog &Log, ThreadPool &Pool,
+ double MinInterval)
+ : Log(Log), Pool(Pool), MinInterval(MinInterval) {
+ for (StringRef Path : PathsRef)
+ Paths.push_back(Path.str());
+}
+
+Error DebuginfodCollection::update() {
+ std::lock_guard<sys::Mutex> Guard(UpdateMutex);
+ if (UpdateTimer.isRunning())
+ UpdateTimer.stopTimer();
+ UpdateTimer.clear();
+ for (const std::string &Path : Paths) {
+ Log.push("Updating binaries at path " + Path);
+ if (Error Err = findBinaries(Path))
+ return Err;
+ }
+ Log.push("Updated collection");
+ UpdateTimer.startTimer();
+ return Error::success();
+}
+
+Expected<bool> DebuginfodCollection::updateIfStale() {
+ if (!UpdateTimer.isRunning())
+ return false;
+ UpdateTimer.stopTimer();
+ double Time = UpdateTimer.getTotalTime().getWallTime();
+ UpdateTimer.startTimer();
+ if (Time < MinInterval)
+ return false;
+ if (Error Err = update())
+ return std::move(Err);
+ return true;
+}
+
+Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
+ while (true) {
+ if (Error Err = update())
+ return Err;
+ std::this_thread::sleep_for(Interval);
+ }
+ llvm_unreachable("updateForever loop should never end");
+}
+
+static bool isDebugBinary(object::ObjectFile *Object) {
+ // TODO: handle PDB debuginfo
+ std::unique_ptr<DWARFContext> Context = DWARFContext::create(
+ *Object, DWARFContext::ProcessDebugRelocations::Process);
+ const DWARFObject &DObj = Context->getDWARFObj();
+ unsigned NumSections = 0;
+ DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; });
+ return NumSections;
+}
+
+static bool hasELFMagic(StringRef FilePath) {
+ file_magic Type;
+ std::error_code EC = identify_magic(FilePath, Type);
+ if (EC)
+ return false;
+ switch (Type) {
+ case file_magic::elf:
+ case file_magic::elf_relocatable:
+ case file_magic::elf_executable:
+ case file_magic::elf_shared_object:
+ case file_magic::elf_core:
+ return true;
+ default:
+ return false;
+ }
+}
+
+Error DebuginfodCollection::findBinaries(StringRef Path) {
+ std::error_code EC;
+ sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
+ std::mutex IteratorMutex;
+ ThreadPoolTaskGroup IteratorGroup(Pool);
+ for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
+ WorkerIndex++) {
+ IteratorGroup.async([&, this]() -> void {
+ std::string FilePath;
+ while (true) {
+ {
+ // Check if iteration is over or there is an error during iteration
+ std::lock_guard<std::mutex> Guard(IteratorMutex);
+ if (I == E || EC)
+ return;
+ // Grab a file path from the directory iterator and advance the
+ // iterator.
+ FilePath = I->path();
+ I.increment(EC);
+ }
+
+ // Inspect the file at this path to determine if it is debuginfo.
+ if (!hasELFMagic(FilePath))
+ continue;
+
+ Expected<object::OwningBinary<object::Binary>> BinOrErr =
+ object::createBinary(FilePath);
+
+ if (!BinOrErr) {
+ consumeError(BinOrErr.takeError());
+ continue;
+ }
+ object::Binary *Bin = std::move(BinOrErr.get().getBinary());
+ if (!Bin->isObject())
+ continue;
+
+ // TODO: Support non-ELF binaries
+ object::ELFObjectFileBase *Object =
+ dyn_cast<object::ELFObjectFileBase>(Bin);
+ if (!Object)
+ continue;
+
+ Optional<BuildIDRef> ID = symbolize::getBuildID(Object);
+ if (!ID)
+ continue;
+
+ std::string IDString = buildIDToString(ID.getValue());
+ if (isDebugBinary(Object)) {
+ std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
+ DebugBinaries[IDString] = FilePath;
+ } else {
+ std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
+ Binaries[IDString] = FilePath;
+ }
+ }
+ });
+ }
+ IteratorGroup.wait();
+ std::unique_lock<std::mutex> Guard(IteratorMutex);
+ if (EC)
+ return errorCodeToError(EC);
+ return Error::success();
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
+ Log.push("getting binary path of ID " + buildIDToString(ID));
+ std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
+ auto Loc = Binaries.find(buildIDToString(ID));
+ if (Loc != Binaries.end()) {
+ std::string Path = Loc->getValue();
+ return Path;
+ }
+ return None;
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
+ Log.push("getting debug binary path of ID " + buildIDToString(ID));
+ std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
+ auto Loc = DebugBinaries.find(buildIDToString(ID));
+ if (Loc != DebugBinaries.end()) {
+ std::string Path = Loc->getValue();
+ return Path;
+ }
+ return None;
+}
+
+Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
+ {
+ // Check collection; perform on-demand update if stale.
+ Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Optional<std::string> Path = *PathOrErr;
+ if (!Path) {
+ Expected<bool> UpdatedOrErr = updateIfStale();
+ if (!UpdatedOrErr)
+ return UpdatedOrErr.takeError();
+ if (*UpdatedOrErr) {
+ // Try once more.
+ PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Path = *PathOrErr;
+ }
+ }
+ if (Path)
+ return Path.getValue();
+ }
+
+ // Try federation.
+ Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
+ if (!PathOrErr)
+ consumeError(PathOrErr.takeError());
+
+ // Fall back to debug binary.
+ return findDebugBinaryPath(ID);
+}
+
+Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
+ // Check collection; perform on-demand update if stale.
+ Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Optional<std::string> Path = *PathOrErr;
+ if (!Path) {
+ Expected<bool> UpdatedOrErr = updateIfStale();
+ if (!UpdatedOrErr)
+ return UpdatedOrErr.takeError();
+ if (*UpdatedOrErr) {
+ // Try once more.
+ PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Path = *PathOrErr;
+ }
+ }
+ if (Path)
+ return Path.getValue();
+
+ // Try federation.
+ return getCachedOrDownloadDebuginfo(ID);
+}
+
+DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
+ DebuginfodCollection &Collection)
+ : Log(Log), Collection(Collection) {
+ cantFail(
+ Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
+ Log.push("GET " + Request.UrlPath);
+ std::string IDString;
+ if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+ Request.setResponse(
+ {404, "text/plain", "Build ID is not a hex string\n"});
+ return;
+ }
+ BuildID ID(IDString.begin(), IDString.end());
+ Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
+ if (Error Err = PathOrErr.takeError()) {
+ consumeError(std::move(Err));
+ Request.setResponse({404, "text/plain", "Build ID not found\n"});
+ return;
+ }
+ streamFile(Request, *PathOrErr);
+ }));
+ cantFail(
+ Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
+ Log.push("GET " + Request.UrlPath);
+ std::string IDString;
+ if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+ Request.setResponse(
+ {404, "text/plain", "Build ID is not a hex string\n"});
+ return;
+ }
+ BuildID ID(IDString.begin(), IDString.end());
+ Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
+ if (Error Err = PathOrErr.takeError()) {
+ consumeError(std::move(Err));
+ Request.setResponse({404, "text/plain", "Build ID not found\n"});
+ return;
+ }
+ streamFile(Request, *PathOrErr);
+ }));
+}
+
} // namespace llvm
More information about the llvm-commits
mailing list