[llvm] 22b9404 - Optionally print symbolizer markup backtraces.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 17 10:54:54 PDT 2023


Author: Daniel Thornburgh
Date: 2023-08-17T10:54:47-07:00
New Revision: 22b9404f09dc4411e4f6d05f4a1724897e5e131a

URL: https://github.com/llvm/llvm-project/commit/22b9404f09dc4411e4f6d05f4a1724897e5e131a
DIFF: https://github.com/llvm/llvm-project/commit/22b9404f09dc4411e4f6d05f4a1724897e5e131a.diff

LOG: Optionally print symbolizer markup backtraces.

When the environment LLVM_ENABLE_SYMBOLIZER_MARKUP is set, if
llvm-symbolizer fails or is disabled, this change will print a backtrace
in llvm-symbolizer markup instead of falling back to in-process
symbolization mechanisms.

This allows llvm-symbolizer to be run on the output later to produce a
high quality backtrace, even for fully-stripped LLVM utilities.

Reviewed By: mcgrathr

Differential Revision: https://reviews.llvm.org/D139750

Added: 
    llvm/unittests/Support/SignalsTest.cpp

Modified: 
    llvm/docs/SymbolizerMarkupFormat.rst
    llvm/lib/Support/Signals.cpp
    llvm/lib/Support/Unix/Signals.inc
    llvm/lib/Support/Windows/Signals.inc
    llvm/unittests/Support/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index d6b22a409969bd..d5b17d7e2d1d4b 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -33,7 +33,9 @@ text. It's specifically intended not to require sanitizing plain text, such as
 the HTML/XML requirement to replace ``<`` with ``<`` and the like.
 
 :doc:`llvm-symbolizer <CommandGuide/llvm-symbolizer>` includes a symbolizing
-filter via its ``--filter-markup`` option.
+filter via its ``--filter-markup`` option. Also, LLVM utilites emit stack
+traces as markup when the ``LLVM_ENABLE_SYMBOLIZER_MARKUP`` environment
+variable is set.
 
 Scope and assumptions
 =====================

diff  --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index c681266f03b750..e6f74b801c52b4 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -72,6 +72,7 @@ void llvm::initSignalsOptions() {
 
 constexpr char DisableSymbolizationEnv[] = "LLVM_DISABLE_SYMBOLIZATION";
 constexpr char LLVMSymbolizerPathEnv[] = "LLVM_SYMBOLIZER_PATH";
+constexpr char EnableSymbolizerMarkupEnv[] = "LLVM_ENABLE_SYMBOLIZER_MARKUP";
 
 // Callbacks to run in signal handler must be lock-free because a signal handler
 // could be running as we add new callbacks. We don't add unbounded numbers of
@@ -252,6 +253,25 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   return true;
 }
 
+static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
+
+LLVM_ATTRIBUTE_USED
+static bool printMarkupStackTrace(StringRef Argv0, void **StackTrace, int Depth,
+                                  raw_ostream &OS) {
+  const char *Env = getenv(EnableSymbolizerMarkupEnv);
+  if (!Env || !*Env)
+    return false;
+
+  std::string MainExecutableName =
+      sys::fs::exists(Argv0) ? std::string(Argv0)
+                             : sys::fs::getMainExecutable(nullptr, nullptr);
+  if (!printMarkupContext(OS, MainExecutableName.c_str()))
+    return false;
+  for (int I = 0; I < Depth; I++)
+    OS << format("{{{bt:%d:%#016x}}}\n", I, StackTrace[I]);
+  return true;
+}
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Signals.inc"

diff  --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index fcf5701afcfd42..529cc92a28490b 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -510,6 +510,117 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
   dl_iterate_phdr(dl_iterate_phdr_cb, &data);
   return true;
 }
+
+class DSOMarkupPrinter {
+  llvm::raw_ostream &OS;
+  const char *MainExecutableName;
+  size_t ModuleCount = 0;
+  bool IsFirst = true;
+
+public:
+  DSOMarkupPrinter(llvm::raw_ostream &OS, const char *MainExecutableName)
+      : OS(OS), MainExecutableName(MainExecutableName) {}
+
+  /// Print llvm-symbolizer markup describing the layout of the given DSO.
+  void printDSOMarkup(dl_phdr_info *Info) {
+    ArrayRef<uint8_t> BuildID = findBuildID(Info);
+    if (BuildID.empty())
+      return;
+    OS << format("{{{module:%d:%s:elf:", ModuleCount,
+                 IsFirst ? MainExecutableName : Info->dlpi_name);
+    for (uint8_t X : BuildID)
+      OS << format("%02x", X);
+    OS << "}}}\n";
+
+    for (int I = 0; I < Info->dlpi_phnum; I++) {
+      const auto *Phdr = &Info->dlpi_phdr[I];
+      if (Phdr->p_type != PT_LOAD)
+        continue;
+      uintptr_t StartAddress = Info->dlpi_addr + Phdr->p_vaddr;
+      uintptr_t ModuleRelativeAddress = Phdr->p_vaddr;
+      std::array<char, 4> ModeStr = modeStrFromFlags(Phdr->p_flags);
+      OS << format("{{{mmap:%#016x:%#x:load:%d:%s:%#016x}}}\n", StartAddress,
+                   Phdr->p_memsz, ModuleCount, &ModeStr[0],
+                   ModuleRelativeAddress);
+    }
+    IsFirst = false;
+    ModuleCount++;
+  }
+
+  /// Callback for use with dl_iterate_phdr. The last dl_iterate_phdr argument
+  /// must be a pointer to an instance of this class.
+  static int printDSOMarkup(dl_phdr_info *Info, size_t Size, void *Arg) {
+    static_cast<DSOMarkupPrinter *>(Arg)->printDSOMarkup(Info);
+    return 0;
+  }
+
+  // Returns the build ID for the given DSO as an array of bytes. Returns an
+  // empty array if none could be found.
+  ArrayRef<uint8_t> findBuildID(dl_phdr_info *Info) {
+    for (int I = 0; I < Info->dlpi_phnum; I++) {
+      const auto *Phdr = &Info->dlpi_phdr[I];
+      if (Phdr->p_type != PT_NOTE)
+        continue;
+
+      ArrayRef<uint8_t> Notes(
+          reinterpret_cast<const uint8_t *>(Info->dlpi_addr + Phdr->p_vaddr),
+          Phdr->p_memsz);
+      while (Notes.size() > 12) {
+        uint32_t NameSize = *reinterpret_cast<const uint32_t *>(Notes.data());
+        Notes = Notes.drop_front(4);
+        uint32_t DescSize = *reinterpret_cast<const uint32_t *>(Notes.data());
+        Notes = Notes.drop_front(4);
+        uint32_t Type = *reinterpret_cast<const uint32_t *>(Notes.data());
+        Notes = Notes.drop_front(4);
+
+        ArrayRef<uint8_t> Name = Notes.take_front(NameSize);
+        auto CurPos = reinterpret_cast<uintptr_t>(Notes.data());
+        uint32_t BytesUntilDesc =
+            alignToPowerOf2(CurPos + NameSize, 4) - CurPos;
+        if (BytesUntilDesc >= Notes.size())
+          break;
+        Notes = Notes.drop_front(BytesUntilDesc);
+
+        ArrayRef<uint8_t> Desc = Notes.take_front(DescSize);
+        CurPos = reinterpret_cast<uintptr_t>(Notes.data());
+        uint32_t BytesUntilNextNote =
+            alignToPowerOf2(CurPos + DescSize, 4) - CurPos;
+        if (BytesUntilNextNote > Notes.size())
+          break;
+        Notes = Notes.drop_front(BytesUntilNextNote);
+
+        if (Type == 3 /*NT_GNU_BUILD_ID*/ && Name.size() >= 3 &&
+            Name[0] == 'G' && Name[1] == 'N' && Name[2] == 'U')
+          return Desc;
+      }
+    }
+    return {};
+  }
+
+  // Returns a symbolizer markup string describing the permissions on a DSO
+  // with the given p_flags.
+  std::array<char, 4> modeStrFromFlags(uint32_t Flags) {
+    std::array<char, 4> Mode;
+    char *Cur = &Mode[0];
+    if (Flags & PF_R)
+      *Cur++ = 'r';
+    if (Flags & PF_W)
+      *Cur++ = 'w';
+    if (Flags & PF_X)
+      *Cur++ = 'x';
+    *Cur = '\0';
+    return Mode;
+  }
+};
+
+static bool printMarkupContext(llvm::raw_ostream &OS,
+                               const char *MainExecutableName) {
+  OS << "{{{reset}}}\n";
+  DSOMarkupPrinter MP(OS, MainExecutableName);
+  dl_iterate_phdr(DSOMarkupPrinter::printDSOMarkup, &MP);
+  return true;
+}
+
 #elif ENABLE_BACKTRACES && defined(__APPLE__) && defined(__LP64__)
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   const char **Modules, intptr_t *Offsets,
@@ -544,6 +655,7 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
   }
   return true;
 }
+
 #else
 /// Backtraces are not enabled or we don't yet know how to find all loaded DSOs
 /// on this platform.
@@ -553,6 +665,11 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   StringSaver &StrPool) {
   return false;
 }
+
+static bool printMarkupContext(llvm::raw_ostream &OS,
+                               const char *MainExecutableName) {
+  return false;
+}
 #endif // ENABLE_BACKTRACES && ... (findModulesAndOffsets variants)
 
 #if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
@@ -613,6 +730,8 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
   // backtrace() for printing a symbolized stack trace.
   if (!Depth)
     Depth = depth;
+  if (printMarkupStackTrace(Argv0, StackTrace, Depth, OS))
+    return;
   if (printSymbolizedStackTrace(Argv0, StackTrace, Depth, OS))
     return;
   OS << "Stack dump without symbol names (ensure you have llvm-symbolizer in "

diff  --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index cb82f55fc38bec..34635b5aba7a1b 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -302,6 +302,11 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
   return true;
 }
 
+static bool printMarkupContext(llvm::raw_ostream &OS,
+                               const char *MainExecutableName) {
+  return false;
+}
+
 static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
                                      HANDLE hThread, STACKFRAME64 &StackFrame,
                                      CONTEXT *Context) {

diff  --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index 92aca0010186ba..12f2e995932604 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -74,6 +74,7 @@ add_llvm_unittest(SupportTests
   ScaledNumberTest.cpp
   ScopedPrinterTest.cpp
   SHA256.cpp
+  SignalsTest.cpp
   SourceMgrTest.cpp
   SpecialCaseListTest.cpp
   SuffixTreeTest.cpp

diff  --git a/llvm/unittests/Support/SignalsTest.cpp b/llvm/unittests/Support/SignalsTest.cpp
new file mode 100644
index 00000000000000..beca8f65304dc8
--- /dev/null
+++ b/llvm/unittests/Support/SignalsTest.cpp
@@ -0,0 +1,65 @@
+//===-- llvm/unittest/Support/SignalsTest.cpp - Signals unit tests --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains unit tests for Signals.cpp and Signals.inc.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Config/config.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest-matchers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+using testing::MatchesRegex;
+using testing::Not;
+
+#define TAG_BEGIN "\\{\\{\\{"
+#define TAG_END "\\}\\}\\}"
+
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H &&             \
+    (defined(__linux__) || defined(__FreeBSD__) ||                             \
+     defined(__FreeBSD_kernel__) || defined(__NetBSD__))
+TEST(SignalsTest, PrintsSymbolizerMarkup) {
+  auto Exit =
+      make_scope_exit([]() { unsetenv("LLVM_ENABLE_SYMBOLIZER_MARKUP"); });
+  setenv("LLVM_ENABLE_SYMBOLIZER_MARKUP", "1", 1);
+  std::string Res;
+  raw_string_ostream RawStream(Res);
+  PrintStackTrace(RawStream);
+  EXPECT_THAT(Res, MatchesRegex(TAG_BEGIN "reset" TAG_END ".*"));
+  // Module line for main binary
+  EXPECT_THAT(Res,
+              MatchesRegex(".*" TAG_BEGIN
+                           "module:0:[^:]*SupportTests:elf:[0-9a-f]+" TAG_END
+                           ".*"));
+  // Text segment for main binary
+  EXPECT_THAT(
+      Res,
+      MatchesRegex(".*" TAG_BEGIN
+                   "mmap:0x[0-9a-f]+:0x[0-9a-f]+:load:0:rx:0x[0-9a-f]+" TAG_END
+                   ".*"));
+  // Backtrace line
+  EXPECT_THAT(Res, MatchesRegex(".*" TAG_BEGIN "bt:0:0x[0-9a-f]+"
+                                ".*"));
+}
+
+TEST(SignalsTest, SymbolizerMarkupDisabled) {
+  auto Exit = make_scope_exit([]() { unsetenv("LLVM_DISABLE_SYMBOLIZATION"); });
+  setenv("LLVM_DISABLE_SYMBOLIZATION", "1", 1);
+  std::string Res;
+  raw_string_ostream RawStream(Res);
+  PrintStackTrace(RawStream);
+  EXPECT_THAT(Res, Not(MatchesRegex(TAG_BEGIN "reset" TAG_END ".*")));
+}
+
+#endif // defined(HAVE_BACKTRACE) && ...


        


More information about the llvm-commits mailing list