[llvm] [DLCov] Origin-Tracking: Enable collecting and symbolizing stack traces (PR #143591)
Stephen Tozer via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 08:05:00 PDT 2025
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143591
>From 46c1770170ceab18ec5576c2fd999981b55e9dd0 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 19:58:09 +0100
Subject: [PATCH] [DLCov] Origin-Tracking: SymbolizeAddresses
---
llvm/include/llvm/Support/Signals.h | 37 +++++
llvm/lib/Support/Signals.cpp | 201 ++++++++++++++++++---------
llvm/lib/Support/Unix/Signals.inc | 15 ++
llvm/lib/Support/Windows/Signals.inc | 5 +
4 files changed, 195 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 6ce26acdd458e..18df195bacd81 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -14,10 +14,25 @@
#ifndef LLVM_SUPPORT_SIGNALS_H
#define LLVM_SUPPORT_SIGNALS_H
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include <cstdint>
#include <string>
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+namespace llvm {
+// Typedefs that are convenient but only used by the stack-trace-collection code
+// added if DebugLoc origin-tracking is enabled.
+using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
+using SymbolizedAddressMap =
+ DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
+ detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
+}
+#endif
+
namespace llvm {
class StringRef;
class raw_ostream;
@@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash();
/// specified, the entire frame is printed.
LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0);
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#ifdef NDEBUG
+#error DebugLoc origin-tracking should not be enabled in Release builds.
+#endif
+/// Populates the given array with a stack trace of the current program, up to
+/// MaxDepth frames. Returns the number of frames returned, which will be
+/// inserted into \p StackTrace from index 0. All entries after the returned
+/// depth will be unmodified. NB: This is only intended to be used for
+/// introspection of LLVM by Debugify, will not be enabled in release builds,
+/// and should not be relied on for other purposes.
+template <unsigned long MaxDepth>
+int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
+
+/// Takes a set of \p Addresses, symbolizes them and stores the result in the
+/// provided \p SymbolizedAddresses map.
+/// NB: This is only intended to be used for introspection of LLVM by
+/// Debugify, will not be enabled in release builds, and should not be relied
+/// on for other purposes.
+void symbolizeAddresses(AddressSet &Addresses,
+ SymbolizedAddressMap &SymbolizedAddresses);
+#endif
+
// Run all registered signal handlers.
LLVM_ABI void RunSignalHandlers();
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 9f9030e79d104..a9c61f23497a5 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -31,7 +31,6 @@
#include "llvm/Support/raw_ostream.h"
#include <array>
#include <cmath>
-#include <vector>
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) {
return format_hex((uint64_t)PC, PtrWidth);
}
-/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
-LLVM_ATTRIBUTE_USED
-static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
- int Depth, llvm::raw_ostream &OS) {
- if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
- return false;
-
- // Don't recursively invoke the llvm-symbolizer binary.
- if (Argv0.contains("llvm-symbolizer"))
- return false;
-
- // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
- // into actual instruction addresses.
- // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
- // alongside our binary, then in $PATH.
- ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
- if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
- LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
- } else if (!Argv0.empty()) {
- StringRef Parent = llvm::sys::path::parent_path(Argv0);
- if (!Parent.empty())
- LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
- }
- if (!LLVMSymbolizerPathOrErr)
- LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
- if (!LLVMSymbolizerPathOrErr)
- return false;
- const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
-
- // If we don't know argv0 or the address of main() at this point, try
- // to guess it anyway (it's possible on some platforms).
- std::string MainExecutableName =
- sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
- : sys::fs::getMainExecutable(nullptr, nullptr);
+/// Reads a file \p Filename written by llvm-symbolizer containing function
+/// names and source locations for the addresses in \p AddressList and returns
+/// the strings in a vector of pairs, where the first pair element is the index
+/// of the corresponding entry in AddressList and the second is the symbolized
+/// frame, in a format based on the sanitizer stack trace printer, with the
+/// exception that it does not write out frame numbers (i.e. "#2 " for the
+/// third address), as it is not assumed that \p AddressList corresponds to a
+/// single stack trace.
+/// There may be multiple returned entries for a single \p AddressList entry if
+/// that frame address corresponds to one or more inlined frames; in this case,
+/// all frames for an address will appear contiguously and in-order.
+std::optional<SmallVector<std::pair<unsigned, std::string>, 0>>
+collectAddressSymbols(void **AddressList, unsigned AddressCount,
+ const char *MainExecutableName,
+ const std::string &LLVMSymbolizerPath) {
BumpPtrAllocator Allocator;
StringSaver StrPool(Allocator);
- std::vector<const char *> Modules(Depth, nullptr);
- std::vector<intptr_t> Offsets(Depth, 0);
- if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
- MainExecutableName.c_str(), StrPool))
- return false;
+ SmallVector<const char *, 0> Modules(AddressCount, nullptr);
+ SmallVector<intptr_t, 0> Offsets(AddressCount, 0);
+ if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(), Offsets.data(),
+ MainExecutableName, StrPool))
+ return {};
int InputFD;
SmallString<32> InputFile, OutputFile;
sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
@@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
{
raw_fd_ostream Input(InputFD, true);
- for (int i = 0; i < Depth; i++) {
- if (Modules[i])
- Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
+ for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
+ if (Modules[AddrIdx])
+ Input << Modules[AddrIdx] << " " << (void*)Offsets[AddrIdx] << "\n";
}
}
@@ -206,53 +186,148 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
int RunResult =
sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
if (RunResult != 0)
- return false;
+ return {};
- // This report format is based on the sanitizer stack trace printer. See
- // sanitizer_stacktrace_printer.cc in compiler-rt.
+
+ SmallVector<std::pair<unsigned, std::string>, 0> Result;
auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
if (!OutputBuf)
- return false;
+ return {};
StringRef Output = OutputBuf.get()->getBuffer();
SmallVector<StringRef, 32> Lines;
Output.split(Lines, "\n");
- auto CurLine = Lines.begin();
- int frame_no = 0;
- for (int i = 0; i < Depth; i++) {
- auto PrintLineHeader = [&]() {
- OS << right_justify(formatv("#{0}", frame_no++).str(),
- std::log10(Depth) + 2)
- << ' ' << format_ptr(StackTrace[i]) << ' ';
- };
- if (!Modules[i]) {
- PrintLineHeader();
- OS << '\n';
+ auto *CurLine = Lines.begin();
+ // Lines contains the output from llvm-symbolizer, which should contain for
+ // each address with a module in order of appearance, one or more lines
+ // containing the function name and line associated with that address,
+ // followed by an empty line.
+ // For each address, adds an output entry for every real or inlined frame at
+ // that address. For addresses without known modules, we have a single entry
+ // containing just the formatted address; for all other output entries, we
+ // output the function entry if it is known, and either the line number if it
+ // is known or the module+address offset otherwise.
+ for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
+ if (!Modules[AddrIdx]) {
+ auto &SymbolizedFrame =
+ Result.emplace_back(std::make_pair(AddrIdx, ""));
+ raw_string_ostream OS(SymbolizedFrame.second);
+ OS << format_ptr(AddressList[AddrIdx]);
continue;
}
// Read pairs of lines (function name and file/line info) until we
// encounter empty line.
for (;;) {
if (CurLine == Lines.end())
- return false;
+ return {};
StringRef FunctionName = *CurLine++;
if (FunctionName.empty())
break;
- PrintLineHeader();
+ auto &SymbolizedFrame =
+ Result.emplace_back(std::make_pair(AddrIdx, ""));
+ raw_string_ostream OS(SymbolizedFrame.second);
+ OS << format_ptr(AddressList[AddrIdx]) << ' ';
if (!FunctionName.starts_with("??"))
OS << FunctionName << ' ';
if (CurLine == Lines.end())
- return false;
+ return {};
StringRef FileLineInfo = *CurLine++;
if (!FileLineInfo.starts_with("??"))
OS << FileLineInfo;
else
- OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
- OS << "\n";
+ OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0) << ")";
}
}
+ return Result;
+}
+
+ErrorOr<std::string> getLLVMSymbolizerPath(StringRef Argv0 = {}) {
+ ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
+ if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
+ LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
+ } else if (!Argv0.empty()) {
+ StringRef Parent = llvm::sys::path::parent_path(Argv0);
+ if (!Parent.empty())
+ LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
+ }
+ if (!LLVMSymbolizerPathOrErr)
+ LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
+ return LLVMSymbolizerPathOrErr;
+}
+
+/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
+LLVM_ATTRIBUTE_USED
+static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
+ int Depth, llvm::raw_ostream &OS) {
+ if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
+ return false;
+
+ // Don't recursively invoke the llvm-symbolizer binary.
+ if (Argv0.contains("llvm-symbolizer"))
+ return false;
+
+ // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
+ // into actual instruction addresses.
+ // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
+ // alongside our binary, then in $PATH.
+ ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0);
+ if (!LLVMSymbolizerPathOrErr)
+ return false;
+ const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+
+ // If we don't know argv0 or the address of main() at this point, try
+ // to guess it anyway (it's possible on some platforms).
+ std::string MainExecutableName =
+ sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
+ : sys::fs::getMainExecutable(nullptr, nullptr);
+
+ auto SymbolizedAddressesOpt = collectAddressSymbols(
+ StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath);
+ if (!SymbolizedAddressesOpt)
+ return false;
+ for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size();
+ ++FrameNo) {
+ OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2)
+ << ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n';
+ }
return true;
}
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+void sys::symbolizeAddresses(AddressSet &Addresses,
+ SymbolizedAddressMap &SymbolizedAddresses) {
+ assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
+ "Debugify origin stacktraces require symbolization to be enabled.");
+
+ // Convert Set of Addresses to ordered list.
+ SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
+ if (AddressList.empty())
+ return;
+ llvm::sort(AddressList);
+
+ // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
+ // alongside our binary, then in $PATH.
+ ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath();
+ if (!LLVMSymbolizerPathOrErr)
+ report_fatal_error("Debugify origin stacktraces require llvm-symbolizer");
+ const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+
+ // Try to guess the main executable name, since we don't have argv0 available
+ // here.
+ std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+
+ auto SymbolizedAddressesOpt = collectAddressSymbols(
+ AddressList.begin(), AddressList.size(),
+ MainExecutableName.c_str(), LLVMSymbolizerPath);
+ if (!SymbolizedAddressesOpt)
+ return;
+ for (auto SymbolizedFrame : *SymbolizedAddressesOpt) {
+ SmallVector<std::string, 0> &SymbolizedAddrs = SymbolizedAddresses[AddressList[SymbolizedFrame.first]];
+ SymbolizedAddrs.push_back(SymbolizedFrame.second);
+ }
+ return;
+}
+#endif
+
static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
LLVM_ATTRIBUTE_USED
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 6668a2953b3b2..a4525a5903649 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
return 0;
}
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#if !defined(HAVE_BACKTRACE)
+#error DebugLoc origin-tracking currently requires `backtrace()`.
+#endif
+namespace llvm {
+namespace sys {
+template <unsigned long MaxDepth>
+int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
+ return backtrace(StackTrace.data(), MaxDepth);
+}
+template int getStackTrace<16ul>(std::array<void *, 16ul> &);
+} // namespace sys
+} // namespace llvm
+#endif
+
/// If this is an ELF platform, we can find all loaded modules and their virtual
/// addresses with dl_iterate_phdr.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index f11ad09f37139..441b66a294e42 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -9,6 +9,7 @@
// This file provides the Win32 specific implementation of the Signals class.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
@@ -542,6 +543,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
#endif
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#error DebugLoc origin-tracking currently unimplemented for Windows.
+#endif
+
static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
STACKFRAME64 StackFrame{};
CONTEXT Context{};
More information about the llvm-commits
mailing list