[compiler-rt] [sanitizer_symbolizer] Symbolizer Markup for linux. (PR #65543)

Andres Villegas via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 6 15:34:48 PDT 2023


https://github.com/avillega created https://github.com/llvm/llvm-project/pull/65543:

This change adds support for sanitizer symbolizer markup for linux. For more informationa about symbolzier markup please visit https://llvm.org/docs/SymbolizerMarkupFormat.html

>From 1c93f8a2f3e2f14e8d1430c542e946a287eade52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Villegas?= <andresvi at google.com>
Date: Thu, 10 Aug 2023 18:19:46 +0000
Subject: [PATCH] [sanitizer] Symbolizer Markup for linux.

This change adds support for sanitizer symbolizer markup
for linux. For more informationa about symbolzier markup
please visit https://llvm.org/docs/SymbolizerMarkupFormat.html
---
 compiler-rt/lib/hwasan/hwasan_report.cpp      |  44 ++++-
 .../lib/sanitizer_common/CMakeLists.txt       |   1 +
 .../lib/sanitizer_common/sanitizer_flags.inc  |   3 +
 .../sanitizer_stacktrace_libcdep.cpp          |  64 +++++-
 .../sanitizer_stacktrace_printer.cpp          |  31 ++-
 .../sanitizer_stacktrace_printer.h            |  10 +-
 .../sanitizer_common/sanitizer_symbolizer.h   |   5 +-
 .../sanitizer_symbolizer_internal.h           |   1 -
 .../sanitizer_symbolizer_libcdep.cpp          |  12 ++
 .../sanitizer_symbolizer_markup.cpp           | 186 +++++++++++++++---
 .../sanitizer_symbolizer_markup.h             |  69 +++++++
 .../sanitizer_symbolizer_posix_libcdep.cpp    |   7 +
 .../sanitizer_symbolizer_report.cpp           |   1 +
 .../sanitizer_stacktrace_printer_test.cpp     |  36 ++--
 compiler-rt/lib/tsan/rtl/tsan_report.cpp      |  10 +
 15 files changed, 413 insertions(+), 67 deletions(-)
 create mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h

diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp
index efe6f57704919a4..9e8fe03510213af 100644
--- a/compiler-rt/lib/hwasan/hwasan_report.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_report.cpp
@@ -248,6 +248,44 @@ static void PrintStackAllocations(StackAllocationsRingBuffer *sa,
     if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) {
       RenderFrame(&frame_desc, " %F %L", 0, frame->info.address, &frame->info,
                   common_flags()->symbolize_vs_style,
+                  common_flags()->enable_symbolizer_markup,
+                  common_flags()->strip_path_prefix);
+      frame->ClearAll();
+    }
+    Printf("%s\n", frame_desc.data());
+    frame_desc.clear();
+  }
+}
+
+
+static void PrintStackAllocationsMarkup(StackAllocationsRingBuffer *sa) {
+  // For symbolizer markup it is just necessary to have dump stack
+  // frames for offline symbolization.
+
+  uptr frames = Min((uptr)flags()->stack_history_size, sa->size());
+  if (const ListOfModules *modules =
+          Symbolizer::GetOrInit()->GetRefreshedListOfModules()) {
+    InternalScopedString modules_res;
+    RenderModules(&modules_res, modules,
+                  /*symbolizer_markup=*/ true);
+    Printf("%s", modules_res.data());
+  }
+
+  InternalScopedString frame_desc;
+  Printf("Previously allocated frames:\n");
+  for (uptr i = 0; i < frames; i++) {
+    const uptr *record_addr = &(*sa)[i];
+    uptr record = *record_addr;
+    if (!record)
+      break;
+    uptr pc_mask = (1ULL << 48) - 1;
+    uptr pc = record & pc_mask;
+    frame_desc.append("  record_addr:0x%zx record:0x%zx",
+                      reinterpret_cast<uptr>(record_addr), record);
+    if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) {
+      RenderFrame(&frame_desc, "", 0, frame->info.address, &frame->info,
+                  common_flags()->symbolize_vs_style,
+                  /*symbolizer_markup=*/ true,
                   common_flags()->strip_path_prefix);
       frame->ClearAll();
     }
@@ -425,7 +463,11 @@ void PrintAddressDescription(
       auto *sa = (t == GetCurrentThread() && current_stack_allocations)
                      ? current_stack_allocations
                      : t->stack_allocations();
-      PrintStackAllocations(sa, addr_tag, untagged_addr);
+      if (common_flags()->enable_symbolizer_markup) {
+        PrintStackAllocations(sa, addr_tag, untagged_addr);
+      } else {
+        PrintStackAllocationsMarkup(sa);
+      }
       num_descriptions_printed++;
     }
   });
diff --git a/compiler-rt/lib/sanitizer_common/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
index d8517f3de06a493..de398edea32753b 100644
--- a/compiler-rt/lib/sanitizer_common/CMakeLists.txt
+++ b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
@@ -191,6 +191,7 @@ set(SANITIZER_IMPL_HEADERS
   sanitizer_symbolizer_internal.h
   sanitizer_symbolizer_libbacktrace.h
   sanitizer_symbolizer_mac.h
+  sanitizer_symbolizer_markup.h
   sanitizer_syscall_generic.inc
   sanitizer_syscall_linux_aarch64.inc
   sanitizer_syscall_linux_arm.inc
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index 6148ae56067cae0..a83c2fafe48a6e6 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -269,3 +269,6 @@ COMMON_FLAG(bool, detect_write_exec, false,
 COMMON_FLAG(bool, test_only_emulate_no_memorymap, false,
             "TEST ONLY fail to read memory mappings to emulate sanitized "
             "\"init\"")
+COMMON_FLAG(bool, enable_symbolizer_markup, false,
+            "Use sanitizer symbolizer "
+            "markup, available only on Linux and Fuchsia.")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
index 47983ee7ec713f2..6a6da22c2e3e12e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
@@ -11,10 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common.h"
+#include "sanitizer_flags.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_stacktrace_printer.h"
 #include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -26,10 +28,10 @@ class StackTraceTextPrinter {
                         InternalScopedString *output,
                         InternalScopedString *dedup_token)
       : stack_trace_fmt_(stack_trace_fmt),
-        frame_delimiter_(frame_delimiter),
-        output_(output),
         dedup_token_(dedup_token),
-        symbolize_(RenderNeedsSymbolization(stack_trace_fmt)) {}
+        symbolize_(RenderNeedsSymbolization(stack_trace_fmt, false)),
+        frame_delimiter_(frame_delimiter),
+        output_(output) {}
 
   bool ProcessAddressFrames(uptr pc) {
     SymbolizedStack *frames = symbolize_
@@ -43,6 +45,7 @@ class StackTraceTextPrinter {
       RenderFrame(output_, stack_trace_fmt_, frame_num_++, cur->info.address,
                   symbolize_ ? &cur->info : nullptr,
                   common_flags()->symbolize_vs_style,
+                  common_flags()->enable_symbolizer_markup,
                   common_flags()->strip_path_prefix);
 
       if (prev_len != output_->length())
@@ -69,12 +72,45 @@ class StackTraceTextPrinter {
   }
 
   const char *stack_trace_fmt_;
-  const char frame_delimiter_;
   int dedup_frames_ = common_flags()->dedup_token_length;
-  uptr frame_num_ = 0;
-  InternalScopedString *output_;
   InternalScopedString *dedup_token_;
   const bool symbolize_ = false;
+
+ protected:
+  uptr frame_num_ = 0;
+  const char frame_delimiter_;
+  InternalScopedString *output_;
+};
+
+class StackTraceMarkupPrinter : public StackTraceTextPrinter {
+ public:
+  StackTraceMarkupPrinter(InternalScopedString *output)
+      : StackTraceTextPrinter("", '\n', output, nullptr){};
+
+  bool ProcessAddressFrames(uptr pc) {
+    SymbolizedStack *frames = Symbolizer::GetOrInit()->SymbolizePC(pc);
+
+    if (!frames)
+      return false;
+
+    if (const ListOfModules *modules =
+            Symbolizer::GetOrInit()->GetRefreshedListOfModules()) {
+      RenderModulesMarkup(output_, modules);
+    }
+
+    for (SymbolizedStack *cur = frames; cur; cur = cur->next) {
+      uptr prev_len = output_->length();
+      RenderFrame(output_, "", frame_num_++, cur->info.address, nullptr,
+                  common_flags()->symbolize_vs_style,
+                  /*symbolizer_markup=*/true,
+                  common_flags()->strip_path_prefix);
+
+      if (prev_len != output_->length())
+        output_->append("%c", frame_delimiter_);
+    }
+    frames->ClearAll();
+    return true;
+  }
 };
 
 static void CopyStringToBuffer(const InternalScopedString &str, char *out_buf,
@@ -94,8 +130,11 @@ void StackTrace::PrintTo(InternalScopedString *output) const {
   CHECK(output);
 
   InternalScopedString dedup_token;
-  StackTraceTextPrinter printer(common_flags()->stack_trace_format, '\n',
-                                output, &dedup_token);
+  StackTraceTextPrinter printer =
+      common_flags()->enable_symbolizer_markup
+          ? StackTraceMarkupPrinter(output)
+          : StackTraceTextPrinter(common_flags()->stack_trace_format, '\n',
+                                  output, &dedup_token);
 
   if (trace == nullptr || size == 0) {
     output->append("    <empty stack>\n\n");
@@ -194,7 +233,11 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf,
   pc = StackTrace::GetPreviousInstructionPc(pc);
 
   InternalScopedString output;
-  StackTraceTextPrinter printer(fmt, '\0', &output, nullptr);
+  StackTraceTextPrinter printer =
+      common_flags()->enable_symbolizer_markup
+          ? StackTraceMarkupPrinter(&output)
+          : StackTraceTextPrinter(fmt, '\0', &output, nullptr);
+
   if (!printer.ProcessAddressFrames(pc)) {
     output.clear();
     output.append("<can't symbolize>");
@@ -210,7 +253,8 @@ void __sanitizer_symbolize_global(uptr data_addr, const char *fmt,
   DataInfo DI;
   if (!Symbolizer::GetOrInit()->SymbolizeData(data_addr, &DI)) return;
   InternalScopedString data_desc;
-  RenderData(&data_desc, fmt, &DI, common_flags()->strip_path_prefix);
+  RenderData(&data_desc, fmt, &DI, common_flags()->enable_symbolizer_markup,
+             common_flags()->strip_path_prefix);
   internal_strncpy(out_buf, data_desc.data(), out_buf_size);
   out_buf[out_buf_size - 1] = 0;
 }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
index 45c480d225c7f5a..b29e688b214c6ed 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
@@ -14,7 +14,7 @@
 
 #include "sanitizer_file.h"
 #include "sanitizer_flags.h"
-#include "sanitizer_fuchsia.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -143,7 +143,12 @@ static const char kDefaultFormat[] = "    #%n %p %F %L";
 
 void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
                  uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix) {
+                 bool symbolizer_markup, const char *strip_path_prefix) {
+  if (symbolizer_markup) {
+    RenderFrameMarkup(buffer, format, frame_no, address, info, vs_style,
+                      strip_path_prefix);
+    return;
+  }
   // info will be null in the case where symbolization is not needed for the
   // given format. This ensures that the code below will get a hard failure
   // rather than print incorrect information in case RenderNeedsSymbolization
@@ -250,7 +255,11 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
   }
 }
 
-bool RenderNeedsSymbolization(const char *format) {
+bool RenderNeedsSymbolization(const char *format, bool symbolizer_markup) {
+  if (symbolizer_markup) {
+    // Online symbolization is never needed for symbolizer markup.
+    return false;
+  }
   if (0 == internal_strcmp(format, "DEFAULT"))
     format = kDefaultFormat;
   for (const char *p = format; *p != '\0'; p++) {
@@ -274,7 +283,12 @@ bool RenderNeedsSymbolization(const char *format) {
 }
 
 void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix) {
+                const DataInfo *DI, bool symbolizer_markup,
+                const char *strip_path_prefix) {
+  if (symbolizer_markup) {
+    RenderDataMarkup(buffer, format, DI, strip_path_prefix);
+    return;
+  }
   for (const char *p = format; *p != '\0'; p++) {
     if (*p != '%') {
       buffer->append("%c", *p);
@@ -333,4 +347,13 @@ void RenderModuleLocation(InternalScopedString *buffer, const char *module,
   buffer->append("+0x%zx)", offset);
 }
 
+void RenderModules(InternalScopedString *buffer, const ListOfModules *modules,
+                   bool symbolizer_markup) {
+  // Rendering all the modules is only needed for symbolizer markup
+  if (!symbolizer_markup)
+    return;
+
+  RenderModulesMarkup(buffer, modules);
+}
+
 } // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
index bf2755a2e8f458d..f514feada1f59be 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
@@ -50,9 +50,9 @@ const char *StripFunctionName(const char *function);
 //   %M - prints module basename and offset, if it is known, or PC.
 void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
                  uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix = "");
+                 bool symbolizer_markup, const char *strip_path_prefix = "");
 
-bool RenderNeedsSymbolization(const char *format);
+bool RenderNeedsSymbolization(const char *format, bool symbolizer_markup);
 
 void RenderSourceLocation(InternalScopedString *buffer, const char *file,
                           int line, int column, bool vs_style,
@@ -67,7 +67,11 @@ void RenderModuleLocation(InternalScopedString *buffer, const char *module,
 // Also accepts:
 //   %g - name of the global variable.
 void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix = "");
+                const DataInfo *DI, bool symbolizer_markup,
+                const char *strip_path_prefix = "");
+
+void RenderModules(InternalScopedString *buffer, const ListOfModules *modules,
+                   bool symbolizer_markup);
 
 }  // namespace __sanitizer
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index df24aaca3abf645..4006b69291df7bc 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -20,7 +20,6 @@
 
 #include "sanitizer_common.h"
 #include "sanitizer_mutex.h"
-#include "sanitizer_vector.h"
 
 namespace __sanitizer {
 
@@ -154,6 +153,10 @@ class Symbolizer final {
 
   void InvalidateModuleList();
 
+  // returns the list of modules if it was refreshed
+  // otherwise returns nullptr.
+  const ListOfModules *GetRefreshedListOfModules();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 3ec4d80105a245a..fce98bfabdbeb0c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -15,7 +15,6 @@
 
 #include "sanitizer_file.h"
 #include "sanitizer_symbolizer.h"
-#include "sanitizer_vector.h"
 
 namespace __sanitizer {
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index a6f82ced2036731..aa5523ce36949a6 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -11,9 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_allocator_internal.h"
+#include "sanitizer_common.h"
+#include "sanitizer_flags.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_platform.h"
 #include "sanitizer_symbolizer_internal.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -188,6 +191,15 @@ void Symbolizer::RefreshModules() {
   modules_fresh_ = true;
 }
 
+const ListOfModules *Symbolizer::GetRefreshedListOfModules() {
+  if (!modules_fresh_) {
+    RefreshModules();
+  }
+
+  CHECK(modules_fresh_);
+  return &modules_;
+}
+
 static const LoadedModule *SearchForModule(const ListOfModules &modules,
                                            uptr address) {
   for (uptr i = 0; i < modules.size(); i++) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index c8c10de10d03a24..a96bcb0e88b31c4 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -11,20 +11,140 @@
 // Implementation of offline markup symbolizer.
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_platform.h"
-#if SANITIZER_SYMBOLIZER_MARKUP
+#include "sanitizer_symbolizer_markup.h"
 
-#if SANITIZER_FUCHSIA
+#include "sanitizer_common.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_platform.h"
+#include "sanitizer_stacktrace.h"
+#include "sanitizer_symbolizer.h"
 #include "sanitizer_symbolizer_fuchsia.h"
-#  endif
 
-#  include <limits.h>
-#  include <unwind.h>
+namespace __sanitizer {
 
-#  include "sanitizer_stacktrace.h"
-#  include "sanitizer_symbolizer.h"
+void RenderDataMarkup(InternalScopedString *buffer, const char *format,
+                      const DataInfo *DI, const char *strip_path_prefix) {
+  buffer->append(kFormatData, DI->start);
+}
 
-namespace __sanitizer {
+bool RenderNeedsSymbolizationMarkup(const char *format) { return false; }
+
+void RenderFrameMarkup(InternalScopedString *buffer, const char *format,
+                       int frame_no, uptr address, const AddressInfo *info,
+                       bool vs_style, const char *strip_path_prefix) {
+  CHECK(!RenderNeedsSymbolizationMarkup(format));
+  buffer->append(kFormatFrame, frame_no, address);
+}
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+  uptr base_address;
+};
+
+bool ModulesEq(const LoadedModule *module,
+               const RenderedModule *renderedModule) {
+  return module->base_address() == renderedModule->base_address &&
+         internal_memcmp(module->uuid(), renderedModule->uuid,
+                         module->uuid_size()) == 0 &&
+         internal_strcmp(module->full_name(), renderedModule->full_name) == 0;
+}
+
+bool ModuleHasBeenRendered(
+    const LoadedModule *module,
+    const InternalMmapVectorNoCtor<RenderedModule> *renderedModules) {
+  for (auto *it = renderedModules->begin(); it != renderedModules->end();
+       ++it) {
+    const auto &renderedModule = *it;
+    if (ModulesEq(module, &renderedModule)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void RenderModulesMarkup(InternalScopedString *buffer,
+                         const ListOfModules *modules) {
+  // Keeps track of the modules that have been rendered.
+  static bool initialized = false;
+  static InternalMmapVectorNoCtor<RenderedModule> renderedModules;
+  if (!initialized) {
+    renderedModules.Initialize(modules->size());
+    initialized = true;
+  }
+
+  if (!renderedModules.size()) {
+    buffer->append("{{{reset}}}\n");
+  }
+
+  for (auto *moduleIt = modules->begin(); moduleIt != modules->end();
+       ++moduleIt) {
+    const LoadedModule &module = *moduleIt;
+
+    if (ModuleHasBeenRendered(&module, &renderedModules)) {
+      continue;
+    }
+
+    buffer->append("{{{module:%d:%s:elf:", renderedModules.size(),
+                   module.full_name());
+    for (uptr i = 0; i < module.uuid_size(); i++) {
+      buffer->append("%02x", module.uuid()[i]);
+    }
+    buffer->append("}}}\n");
+
+    const auto ranges = module.ranges();
+    for (const auto &range : ranges) {
+      buffer->append("{{{mmap:%p:%p:load:%d:r", range.beg,
+                     range.end - range.beg, renderedModules.size());
+      if (range.writable)
+        buffer->append("w");
+      if (range.executable)
+        buffer->append("x");
+
+      // module.base_address = dlpi_addr
+      // range.beg = dlpi_addr + p_vaddr
+      // relative address = p_vaddr = range.beg - module.base_address
+      buffer->append(":%p}}}\n", range.beg - module.base_address());
+    }
+
+    renderedModules.push_back({});
+    RenderedModule &curModule = renderedModules.back();
+    curModule.full_name = internal_strdup(module.full_name());
+    internal_memcpy(curModule.uuid, module.uuid(), module.uuid_size());
+    curModule.base_address = module.base_address();
+  }
+}
+
+MarkupSymbolizer *MarkupSymbolizer::get(LowLevelAllocator *alloc) {
+  return new (*alloc) MarkupSymbolizer();
+}
+
+bool MarkupSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
+  char buffer[kFormatFunctionMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
+  stack->info.function = internal_strdup(buffer);
+  return true;
+}
+
+bool MarkupSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+  info->Clear();
+  info->start = addr;
+  return true;
+}
+
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *MarkupSymbolizer::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
+}
+
+#if SANITIZER_SYMBOLIZER_MARKUP
+#  include <limits.h>
+#  include <unwind.h>
 
 // This generic support for offline symbolizing is based on the
 // Fuchsia port.  We don't do any actual symbolization per se.
@@ -38,9 +158,9 @@ namespace __sanitizer {
 // This is used by UBSan for type names, and by ASan for global variable names.
 // It's expected to return a static buffer that will be reused on each call.
 const char *Symbolizer::Demangle(const char *name) {
-  static char buffer[kFormatDemangleMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
-  return buffer;
+  SymbolizerTool *markupSymbolizer = tools_.front();
+  CHECK(markupSymbolizer);
+  return markupSymbolizer->Demangle(name);
 }
 
 // This is used mostly for suppression matching.  Making it work
@@ -66,38 +186,45 @@ bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
 // to render stack frames, but that should be changed to use
 // RenderStackFrame.
 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
+  SymbolizerTool *markupSymbolizer = tools_.front();
+  CHECK(markupSymbolizer);
+
   SymbolizedStack *s = SymbolizedStack::New(addr);
-  char buffer[kFormatFunctionMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
-  s->info.function = internal_strdup(buffer);
+  markupSymbolizer->SymbolizePC(addr, s);
   return s;
 }
 
 // Always claim we succeeded, so that RenderDataInfo will be called.
 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
-  info->Clear();
-  info->start = addr;
-  return true;
+  SymbolizerTool *markupSymbolizer = tools_.front();
+  CHECK(markupSymbolizer);
+  return markupSymbolizer->SymbolizeData(addr, info);
 }
 
 // We ignore the format argument to __sanitizer_symbolize_global.
 void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix) {
-  buffer->append(kFormatData, DI->start);
+                const DataInfo *DI, bool symbolizer_markup,
+                const char *strip_path_prefix) {
+  RenderDataMarkup(buffer, format, DI, strip_path_prefix);
 }
 
-bool RenderNeedsSymbolization(const char *format) { return false; }
+bool RenderNeedsSymbolization(const char *format, bool symbolizer_markup) {
+  return RenderNeedsSymbolizationMarkup(format);
+}
 
 // We don't support the stack_trace_format flag at all.
 void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
                  uptr address, const AddressInfo *info, bool vs_style,
                  const char *strip_path_prefix) {
-  CHECK(!RenderNeedsSymbolization(format));
-  buffer->append(kFormatFrame, frame_no, address);
+  RenderFrameMarkup(buffer, format, frame_no, address, info, vs_style,
+                    strip_path_prefix);
 }
 
 Symbolizer *Symbolizer::PlatformInit() {
-  return new (symbolizer_allocator_) Symbolizer({});
+  IntrusiveList<SymbolizerTool> tools;
+  SymbolizerTool *tool = MarkupSymbolizer::get(&symbolizer_allocator_);
+  tools.push_back(tool);
+  return new (symbolizer_allocator_) Symbolizer(tools);
 }
 
 void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
@@ -107,7 +234,7 @@ void ReportDeadlySignal(const SignalContext &sig, u32 tid,
                         UnwindSignalStackCallbackType unwind,
                         const void *unwind_context) {}
 
-#if SANITIZER_CAN_SLOW_UNWIND
+#  if SANITIZER_CAN_SLOW_UNWIND
 struct UnwindTraceArg {
   BufferedStackTrace *stack;
   u32 max_depth;
@@ -117,7 +244,8 @@ _Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx, void *param) {
   UnwindTraceArg *arg = static_cast<UnwindTraceArg *>(param);
   CHECK_LT(arg->stack->size, arg->max_depth);
   uptr pc = _Unwind_GetIP(ctx);
-  if (pc < PAGE_SIZE) return _URC_NORMAL_STOP;
+  if (pc < PAGE_SIZE)
+    return _URC_NORMAL_STOP;
   arg->stack->trace_buffer[arg->stack->size++] = pc;
   return (arg->stack->size == arg->max_depth ? _URC_NORMAL_STOP
                                              : _URC_NO_REASON);
@@ -143,8 +271,8 @@ void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) {
   CHECK_GE(max_depth, 2);
   UNREACHABLE("signal context doesn't exist");
 }
-#endif  // SANITIZER_CAN_SLOW_UNWIND
-
-}  // namespace __sanitizer
+#  endif  // SANITIZER_CAN_SLOW_UNWIND
 
 #endif  // SANITIZER_SYMBOLIZER_MARKUP
+
+}  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h
new file mode 100644
index 000000000000000..a09278c30bbd216
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h
@@ -0,0 +1,69 @@
+//===-- sanitizer_symbolizer_markup.h -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries.
+//
+// Definitions of the offline markup symbolizer.
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_SYMBOLIZER_MARKUP_H
+#define SANITIZER_SYMBOLIZER_MARKUP_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_internal.h"
+
+namespace __sanitizer {
+
+// This generic support for offline symbolizing is based on the
+// Fuchsia port.  We don't do any actual symbolization per se.
+// Instead, we emit text containing raw addresses and raw linkage
+// symbol names, embedded in Fuchsia's symbolization markup format.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.  See the spec at:
+// https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
+
+class MarkupSymbolizer final : public SymbolizerTool {
+ public:
+  static MarkupSymbolizer *get(LowLevelAllocator *alloc);
+
+  // This is used in some places for suppression checking, which we
+  // don't really support for Fuchsia.  It's also used in UBSan to
+  // identify a PC location to a function name, so we always fill in
+  // the function member with a string containing markup around the PC
+  // value.
+  // TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+  // to render stack frames, but that should be changed to use
+  // RenderStackFrame.
+  bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
+
+  // Always claim we succeeded, so that RenderDataInfo will be called.
+  bool SymbolizeData(uptr addr, DataInfo *info) override;
+
+  // May return NULL if demangling failed.
+  // This is used by UBSan for type names, and by ASan for global variable
+  // names. It's expected to return a static buffer that will be reused on each
+  // call.
+  const char *Demangle(const char *name) override;
+};
+
+// We ignore the format argument to __sanitizer_symbolize_global.
+void RenderDataMarkup(InternalScopedString *, const char *, const DataInfo *,
+                      const char *);
+
+bool RenderNeedsSymbolizationMarkup(const char *format);
+
+// We don't support the stack_trace_format flag at all.
+void RenderFrameMarkup(InternalScopedString *, const char *, int, uptr,
+                       const AddressInfo *, bool, const char *);
+
+void RenderModulesMarkup(InternalScopedString *, const ListOfModules *);
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_H
\ No newline at end of file
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index 1a5e38faea88747..c1f89d652904e50 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -32,6 +32,7 @@
 #  include "sanitizer_symbolizer_internal.h"
 #  include "sanitizer_symbolizer_libbacktrace.h"
 #  include "sanitizer_symbolizer_mac.h"
+#  include "sanitizer_symbolizer_markup.h"
 
 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
 // because we do not require a C++ ABI library to be linked to a program
@@ -470,6 +471,12 @@ static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
     VReport(2, "Symbolizer is disabled.\n");
     return;
   }
+  if (common_flags()->enable_symbolizer_markup) {
+    VReport(2, "Using symbolizer markup.\n");
+    SymbolizerTool *tool = MarkupSymbolizer::get(allocator);
+    list->push_back(tool);
+    return;
+  }
   if (IsAllocatorOutOfMemory()) {
     VReport(2, "Cannot use internal symbolizer: out of memory\n");
   } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
index 73915715c5ba2c0..7fd21b90ecf62bd 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -35,6 +35,7 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info,
   buff.append("%s ", error_type);
   RenderFrame(&buff, "%L %F", 0, info.address, &info,
               common_flags()->symbolize_vs_style,
+              common_flags()->enable_symbolizer_markup,
               common_flags()->strip_path_prefix);
   ReportErrorSummary(buff.data(), alt_tool_name);
 }
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp
index 489ef4dbb5b7d4c..96c7571574f4c9b 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp
@@ -84,7 +84,7 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
               "%% Frame:%n PC:%p Module:%m ModuleOffset:%o "
               "Function:%f FunctionOffset:%q Source:%s Line:%l "
               "Column:%c",
-              frame_no, info.address, &info, false, "/path/to/");
+              frame_no, info.address, &info, false, false, "/path/to/");
   EXPECT_STREQ("% Frame:42 PC:0x400000 Module:my/module ModuleOffset:0x200 "
                "Function:foo FunctionOffset:0x100 Source:my/source Line:10 "
                "Column:5",
@@ -97,7 +97,7 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
               "%% Frame:%n PC:%p Module:%m ModuleOffset:%o "
               "Function:%f FunctionOffset:%q Source:%s Line:%l "
               "Column:%c",
-              frame_no, info.address, &info, false, "/path/to/");
+              frame_no, info.address, &info, false, false, "/path/to/");
   EXPECT_STREQ("% Frame:42 PC:0x400000 Module:my/module ModuleOffset:0x200 "
                "Function:bar FunctionOffset:0x100 Source:my/source Line:10 "
                "Column:5",
@@ -107,26 +107,26 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
 
   // Test special format specifiers.
   info.address = 0x400000;
-  RenderFrame(&str, "%M", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%M", frame_no, info.address, &info, false, false);
   EXPECT_NE(nullptr, internal_strstr(str.data(), "400000"));
   str.clear();
 
-  RenderFrame(&str, "%L", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%L", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("(<unknown module>)", str.data());
   str.clear();
 
   info.module = internal_strdup("/path/to/module");
   info.module_offset = 0x200;
-  RenderFrame(&str, "%M", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%M", frame_no, info.address, &info, false, false);
   EXPECT_NE(nullptr, internal_strstr(str.data(), "(module+0x"));
   EXPECT_NE(nullptr, internal_strstr(str.data(), "200"));
   str.clear();
 
-  RenderFrame(&str, "%L", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%L", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("(/path/to/module+0x200)", str.data());
   str.clear();
 
-  RenderFrame(&str, "%b", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%b", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("", str.data());
   str.clear();
 
@@ -134,7 +134,7 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
   info.uuid[0] = 0x55;
   info.uuid[1] = 0x66;
 
-  RenderFrame(&str, "%M", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%M", frame_no, info.address, &info, false, false);
   EXPECT_NE(nullptr, internal_strstr(str.data(), "(module+0x"));
   EXPECT_NE(nullptr, internal_strstr(str.data(), "200"));
 #if SANITIZER_APPLE
@@ -144,7 +144,7 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
 #endif
   str.clear();
 
-  RenderFrame(&str, "%L", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%L", frame_no, info.address, &info, false, false);
 #if SANITIZER_APPLE
   EXPECT_STREQ("(/path/to/module+0x200)", str.data());
 #else
@@ -152,46 +152,46 @@ TEST(SanitizerStacktracePrinter, RenderFrame) {
 #endif
   str.clear();
 
-  RenderFrame(&str, "%b", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%b", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("(BuildId: 5566)", str.data());
   str.clear();
 
   info.function = internal_strdup("my_function");
-  RenderFrame(&str, "%F", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%F", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("in my_function", str.data());
   str.clear();
 
   info.function_offset = 0x100;
-  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("in my_function+0x100 <null>", str.data());
   str.clear();
 
   info.file = internal_strdup("my_file");
-  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("in my_function my_file", str.data());
   str.clear();
 
   info.line = 10;
-  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%F %S", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("in my_function my_file:10", str.data());
   str.clear();
 
   info.column = 5;
-  RenderFrame(&str, "%S %L", frame_no, info.address, &info, false);
+  RenderFrame(&str, "%S %L", frame_no, info.address, &info, false, false);
   EXPECT_STREQ("my_file:10:5 my_file:10:5", str.data());
   str.clear();
 
-  RenderFrame(&str, "%S %L", frame_no, info.address, &info, true);
+  RenderFrame(&str, "%S %L", frame_no, info.address, &info, true, false);
   EXPECT_STREQ("my_file(10,5) my_file(10,5)", str.data());
   str.clear();
 
   info.column = 0;
-  RenderFrame(&str, "%F %S", frame_no, info.address, &info, true);
+  RenderFrame(&str, "%F %S", frame_no, info.address, &info, true, false);
   EXPECT_STREQ("in my_function my_file(10)", str.data());
   str.clear();
 
   info.line = 0;
-  RenderFrame(&str, "%F %S", frame_no, info.address, &info, true);
+  RenderFrame(&str, "%F %S", frame_no, info.address, &info, true, false);
   EXPECT_STREQ("in my_function my_file", str.data());
   str.clear();
 
diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp
index 3ae666e1212f72c..c31b30d98ae34b7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp
@@ -103,12 +103,22 @@ void PrintStack(const ReportStack *ent) {
     Printf("    [failed to restore the stack]\n\n");
     return;
   }
+
+  if (const ListOfModules *modules =
+          Symbolizer::GetOrInit()->GetRefreshedListOfModules()) {
+    InternalScopedString modules_res;
+    RenderModules(&modules_res, modules,
+                  common_flags()->enable_symbolizer_markup);
+    Printf("%s", modules_res.data());
+  }
+
   SymbolizedStack *frame = ent->frames;
   for (int i = 0; frame && frame->info.address; frame = frame->next, i++) {
     InternalScopedString res;
     RenderFrame(&res, common_flags()->stack_trace_format, i,
                 frame->info.address, &frame->info,
                 common_flags()->symbolize_vs_style,
+                common_flags()->enable_symbolizer_markup,
                 common_flags()->strip_path_prefix);
     Printf("%s\n", res.data());
   }



More information about the llvm-commits mailing list