[llvm] [DLCov] Origin-Tracking: Enable collecting and symbolizing stack traces (PR #143591)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 08:05:00 PDT 2025


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143591

>From 46c1770170ceab18ec5576c2fd999981b55e9dd0 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 19:58:09 +0100
Subject: [PATCH] [DLCov] Origin-Tracking: SymbolizeAddresses

---
 llvm/include/llvm/Support/Signals.h  |  37 +++++
 llvm/lib/Support/Signals.cpp         | 201 ++++++++++++++++++---------
 llvm/lib/Support/Unix/Signals.inc    |  15 ++
 llvm/lib/Support/Windows/Signals.inc |   5 +
 4 files changed, 195 insertions(+), 63 deletions(-)

diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 6ce26acdd458e..18df195bacd81 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -14,10 +14,25 @@
 #ifndef LLVM_SUPPORT_SIGNALS_H
 #define LLVM_SUPPORT_SIGNALS_H
 
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Compiler.h"
 #include <cstdint>
 #include <string>
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+namespace llvm {
+// Typedefs that are convenient but only used by the stack-trace-collection code
+// added if DebugLoc origin-tracking is enabled.
+using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
+using SymbolizedAddressMap =
+    DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
+             detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
+}
+#endif
+
 namespace llvm {
 class StringRef;
 class raw_ostream;
@@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash();
 ///        specified, the entire frame is printed.
 LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0);
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#ifdef NDEBUG
+#error DebugLoc origin-tracking should not be enabled in Release builds.
+#endif
+/// Populates the given array with a stack trace of the current program, up to
+/// MaxDepth frames. Returns the number of frames returned, which will be
+/// inserted into \p StackTrace from index 0. All entries after the returned
+/// depth will be unmodified. NB: This is only intended to be used for
+/// introspection of LLVM by Debugify, will not be enabled in release builds,
+/// and should not be relied on for other purposes.
+template <unsigned long MaxDepth>
+int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
+
+/// Takes a set of \p Addresses, symbolizes them and stores the result in the
+/// provided \p SymbolizedAddresses map.
+/// NB: This is only intended to be used for introspection of LLVM by
+/// Debugify, will not be enabled in release builds, and should not be relied
+/// on for other purposes.
+void symbolizeAddresses(AddressSet &Addresses,
+                        SymbolizedAddressMap &SymbolizedAddresses);
+#endif
+
 // Run all registered signal handlers.
 LLVM_ABI void RunSignalHandlers();
 
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 9f9030e79d104..a9c61f23497a5 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <array>
 #include <cmath>
-#include <vector>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
@@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) {
   return format_hex((uint64_t)PC, PtrWidth);
 }
 
-/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
-LLVM_ATTRIBUTE_USED
-static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
-                                      int Depth, llvm::raw_ostream &OS) {
-  if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
-    return false;
-
-  // Don't recursively invoke the llvm-symbolizer binary.
-  if (Argv0.contains("llvm-symbolizer"))
-    return false;
-
-  // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
-  // into actual instruction addresses.
-  // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
-  // alongside our binary, then in $PATH.
-  ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
-  if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
-    LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
-  } else if (!Argv0.empty()) {
-    StringRef Parent = llvm::sys::path::parent_path(Argv0);
-    if (!Parent.empty())
-      LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
-  }
-  if (!LLVMSymbolizerPathOrErr)
-    LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
-  if (!LLVMSymbolizerPathOrErr)
-    return false;
-  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
-
-  // If we don't know argv0 or the address of main() at this point, try
-  // to guess it anyway (it's possible on some platforms).
-  std::string MainExecutableName =
-      sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
-                             : sys::fs::getMainExecutable(nullptr, nullptr);
+/// Reads a file \p Filename written by llvm-symbolizer containing function
+/// names and source locations for the addresses in \p AddressList and returns
+/// the strings in a vector of pairs, where the first pair element is the index
+/// of the corresponding entry in AddressList and the second is the symbolized
+/// frame, in a format based on the sanitizer stack trace printer, with the
+/// exception that it does not write out frame numbers (i.e. "#2 " for the
+/// third address), as it is not assumed that \p AddressList corresponds to a
+/// single stack trace.
+/// There may be multiple returned entries for a single \p AddressList entry if
+/// that frame address corresponds to one or more inlined frames; in this case,
+/// all frames for an address will appear contiguously and in-order.
+std::optional<SmallVector<std::pair<unsigned, std::string>, 0>>
+collectAddressSymbols(void **AddressList, unsigned AddressCount,
+                      const char *MainExecutableName,
+                      const std::string &LLVMSymbolizerPath) {
   BumpPtrAllocator Allocator;
   StringSaver StrPool(Allocator);
-  std::vector<const char *> Modules(Depth, nullptr);
-  std::vector<intptr_t> Offsets(Depth, 0);
-  if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
-                             MainExecutableName.c_str(), StrPool))
-    return false;
+  SmallVector<const char *, 0> Modules(AddressCount, nullptr);
+  SmallVector<intptr_t, 0> Offsets(AddressCount, 0);
+  if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(), Offsets.data(),
+                             MainExecutableName, StrPool))
+    return {};
   int InputFD;
   SmallString<32> InputFile, OutputFile;
   sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
@@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
 
   {
     raw_fd_ostream Input(InputFD, true);
-    for (int i = 0; i < Depth; i++) {
-      if (Modules[i])
-        Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
+    for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
+      if (Modules[AddrIdx])
+        Input << Modules[AddrIdx] << " " << (void*)Offsets[AddrIdx] << "\n";
     }
   }
 
@@ -206,53 +186,148 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   int RunResult =
       sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
   if (RunResult != 0)
-    return false;
+    return {};
 
-  // This report format is based on the sanitizer stack trace printer.  See
-  // sanitizer_stacktrace_printer.cc in compiler-rt.
+
+  SmallVector<std::pair<unsigned, std::string>, 0> Result;
   auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
   if (!OutputBuf)
-    return false;
+    return {};
   StringRef Output = OutputBuf.get()->getBuffer();
   SmallVector<StringRef, 32> Lines;
   Output.split(Lines, "\n");
-  auto CurLine = Lines.begin();
-  int frame_no = 0;
-  for (int i = 0; i < Depth; i++) {
-    auto PrintLineHeader = [&]() {
-      OS << right_justify(formatv("#{0}", frame_no++).str(),
-                          std::log10(Depth) + 2)
-         << ' ' << format_ptr(StackTrace[i]) << ' ';
-    };
-    if (!Modules[i]) {
-      PrintLineHeader();
-      OS << '\n';
+  auto *CurLine = Lines.begin();
+  // Lines contains the output from llvm-symbolizer, which should contain for
+  // each address with a module in order of appearance, one or more lines
+  // containing the function name and line associated with that address,
+  // followed by an empty line.
+  // For each address, adds an output entry for every real or inlined frame at
+  // that address. For addresses without known modules, we have a single entry
+  // containing just the formatted address; for all other output entries, we
+  // output the function entry if it is known, and either the line number if it
+  // is known or the module+address offset otherwise.
+  for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
+    if (!Modules[AddrIdx]) {
+      auto &SymbolizedFrame =
+        Result.emplace_back(std::make_pair(AddrIdx, ""));
+      raw_string_ostream OS(SymbolizedFrame.second);
+      OS << format_ptr(AddressList[AddrIdx]);
       continue;
     }
     // Read pairs of lines (function name and file/line info) until we
     // encounter empty line.
     for (;;) {
       if (CurLine == Lines.end())
-        return false;
+        return {};
       StringRef FunctionName = *CurLine++;
       if (FunctionName.empty())
         break;
-      PrintLineHeader();
+      auto &SymbolizedFrame =
+        Result.emplace_back(std::make_pair(AddrIdx, ""));
+      raw_string_ostream OS(SymbolizedFrame.second);
+      OS << format_ptr(AddressList[AddrIdx]) << ' ';
       if (!FunctionName.starts_with("??"))
         OS << FunctionName << ' ';
       if (CurLine == Lines.end())
-        return false;
+        return {};
       StringRef FileLineInfo = *CurLine++;
       if (!FileLineInfo.starts_with("??"))
         OS << FileLineInfo;
       else
-        OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
-      OS << "\n";
+        OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0) << ")";
     }
   }
+  return Result;
+}
+
+ErrorOr<std::string> getLLVMSymbolizerPath(StringRef Argv0 = {}) {
+  ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
+  if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
+    LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
+  } else if (!Argv0.empty()) {
+    StringRef Parent = llvm::sys::path::parent_path(Argv0);
+    if (!Parent.empty())
+      LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
+  }
+  if (!LLVMSymbolizerPathOrErr)
+    LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
+  return LLVMSymbolizerPathOrErr;
+}
+
+/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
+LLVM_ATTRIBUTE_USED
+static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
+                                      int Depth, llvm::raw_ostream &OS) {
+  if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
+    return false;
+
+  // Don't recursively invoke the llvm-symbolizer binary.
+  if (Argv0.contains("llvm-symbolizer"))
+    return false;
+
+  // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
+  // into actual instruction addresses.
+  // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
+  // alongside our binary, then in $PATH.
+  ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0);
+  if (!LLVMSymbolizerPathOrErr)
+    return false;
+  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+
+  // If we don't know argv0 or the address of main() at this point, try
+  // to guess it anyway (it's possible on some platforms).
+  std::string MainExecutableName =
+      sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
+                             : sys::fs::getMainExecutable(nullptr, nullptr);
+
+  auto SymbolizedAddressesOpt = collectAddressSymbols(
+    StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath);
+  if (!SymbolizedAddressesOpt)
+    return false;
+  for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size();
+       ++FrameNo) {
+    OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2)
+       << ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n';
+  }
   return true;
 }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+void sys::symbolizeAddresses(AddressSet &Addresses,
+                             SymbolizedAddressMap &SymbolizedAddresses) {
+  assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
+         "Debugify origin stacktraces require symbolization to be enabled.");
+
+  // Convert Set of Addresses to ordered list.
+  SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
+  if (AddressList.empty())
+    return;
+  llvm::sort(AddressList);
+
+  // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
+  // alongside our binary, then in $PATH.
+  ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath();
+  if (!LLVMSymbolizerPathOrErr)
+    report_fatal_error("Debugify origin stacktraces require llvm-symbolizer");
+  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+
+  // Try to guess the main executable name, since we don't have argv0 available
+  // here.
+  std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+
+  auto SymbolizedAddressesOpt = collectAddressSymbols(
+    AddressList.begin(), AddressList.size(),
+    MainExecutableName.c_str(), LLVMSymbolizerPath);
+  if (!SymbolizedAddressesOpt)
+    return;
+  for (auto SymbolizedFrame : *SymbolizedAddressesOpt) {
+    SmallVector<std::string, 0> &SymbolizedAddrs = SymbolizedAddresses[AddressList[SymbolizedFrame.first]];
+    SymbolizedAddrs.push_back(SymbolizedFrame.second);
+  }
+  return;
+}
+#endif
+
 static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
 
 LLVM_ATTRIBUTE_USED
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 6668a2953b3b2..a4525a5903649 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
   return 0;
 }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#if !defined(HAVE_BACKTRACE)
+#error DebugLoc origin-tracking currently requires `backtrace()`.
+#endif
+namespace llvm {
+namespace sys {
+template <unsigned long MaxDepth>
+int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
+  return backtrace(StackTrace.data(), MaxDepth);
+}
+template int getStackTrace<16ul>(std::array<void *, 16ul> &);
+} // namespace sys
+} // namespace llvm
+#endif
+
 /// If this is an ELF platform, we can find all loaded modules and their virtual
 /// addresses with dl_iterate_phdr.
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index f11ad09f37139..441b66a294e42 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -9,6 +9,7 @@
 // This file provides the Win32 specific implementation of the Signals class.
 //
 //===----------------------------------------------------------------------===//
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ExitCodes.h"
 #include "llvm/Support/FileSystem.h"
@@ -542,6 +543,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
 extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
 #endif
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#error DebugLoc origin-tracking currently unimplemented for Windows.
+#endif
+
 static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
   STACKFRAME64 StackFrame{};
   CONTEXT Context{};



More information about the llvm-commits mailing list