[clang-tools-extra] [sanitizer_symbolizer] Symbolizer Markup for linux (PR #66126)

via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 11:47:13 PDT 2023


=?utf-8?q?Andrés?= Villegas <andresvi at google.com>,
=?utf-8?q?Andrés?= Villegas <andresvi at google.com>,
=?utf-8?q?Andrés?= Villegas <andresvi at google.com>


llvmbot wrote:

@llvm/pr-subscribers-compiler-rt-sanitizer

<details>
<summary>Changes</summary>

This change adds support for sanitizer symbolizer markup for linux.
For more informationa about symbolzier markup please visit https://llvm.org/docs/SymbolizerMarkupFormat.html
--

Patch is 36.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66126.diff

16 Files Affected:

- (modified) compiler-rt/lib/hwasan/hwasan_report.cpp (+42-1) 
- (modified) compiler-rt/lib/sanitizer_common/CMakeLists.txt (+1) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_flags.inc (+3) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp (+57-8) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp (+17-4) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h (+4-3) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h (+4-1) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h (-1) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp (+12) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp (+149-26) 
- (added) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h (+66) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp (+6) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp (+1) 
- (modified) compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cpp (+27-18) 
- (modified) compiler-rt/lib/tsan/rtl/tsan_report.cpp (+13) 
- (added) compiler-rt/test/asan/TestCases/Linux/use-after-free-symbolizer-markup.cpp (+18) 


<pre>
diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp
index efe6f57704919a..a91adbbad91132 100644
--- a/compiler-rt/lib/hwasan/hwasan_report.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_report.cpp
@@ -30,6 +30,7 @@
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_stacktrace_printer.h"
 #include "sanitizer_common/sanitizer_symbolizer.h"
+#include "sanitizer_common/sanitizer_symbolizer_markup.h"
 
 using namespace __sanitizer;
 
@@ -248,6 +249,42 @@ static void PrintStackAllocations(StackAllocationsRingBuffer *sa,
     if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) {
       RenderFrame(&frame_desc, " %F %L", 0, frame->info.address, &frame->info,
                   common_flags()->symbolize_vs_style,
+                  common_flags()->enable_symbolizer_markup,
+                  common_flags()->strip_path_prefix);
+      frame->ClearAll();
+    }
+    Printf("%s\n", frame_desc.data());
+    frame_desc.clear();
+  }
+}
+
+
+static void PrintStackAllocationsMarkup(StackAllocationsRingBuffer *sa) {
+  // For symbolizer markup it is just necessary to have dump stack
+  // frames for offline symbolization.
+
+  uptr frames = Min((uptr)flags()->stack_history_size, sa->size());
+
+  if (const ListOfModules *modules =
+          Symbolizer::GetOrInit()->GetRefreshedListOfModules()) {
+    InternalScopedString modules_res;
+    RenderModulesMarkup(&modules_res, modules);
+    Printf("%s", modules_res.data());
+  }
+
+  InternalScopedString frame_desc;
+  Printf("Previously allocated frames:\n");
+  for (uptr i = 0; i < frames; i++) {
+    const uptr *record_addr = &(*sa)[i];
+    uptr record = *record_addr;
+    if (!record)
+      break;
+    uptr pc_mask = (1ULL << 48) - 1;
+    uptr pc = record & pc_mask;
+    if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) {
+      RenderFrame(&frame_desc, "", 0, frame->info.address, &frame->info,
+                  common_flags()->symbolize_vs_style,
+                  /*symbolizer_markup=*/ true,
                   common_flags()->strip_path_prefix);
       frame->ClearAll();
     }
@@ -425,7 +462,11 @@ void PrintAddressDescription(
       auto *sa = (t == GetCurrentThread() && current_stack_allocations)
                      ? current_stack_allocations
                      : t->stack_allocations();
-      PrintStackAllocations(sa, addr_tag, untagged_addr);
+      if (common_flags()->enable_symbolizer_markup) {
+        PrintStackAllocationsMarkup(sa);
+      } else {
+        PrintStackAllocations(sa, addr_tag, untagged_addr);
+      }
       num_descriptions_printed++;
     }
   });
diff --git a/compiler-rt/lib/sanitizer_common/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
index 25304b71c0c706..16084de94e1678 100644
--- a/compiler-rt/lib/sanitizer_common/CMakeLists.txt
+++ b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
@@ -191,6 +191,7 @@ set(SANITIZER_IMPL_HEADERS
   sanitizer_symbolizer_internal.h
   sanitizer_symbolizer_libbacktrace.h
   sanitizer_symbolizer_mac.h
+  sanitizer_symbolizer_markup.h
   sanitizer_syscall_generic.inc
   sanitizer_syscall_linux_aarch64.inc
   sanitizer_syscall_linux_arm.inc
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index 6148ae56067cae..a83c2fafe48a6e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -269,3 +269,6 @@ COMMON_FLAG(bool, detect_write_exec, false,
 COMMON_FLAG(bool, test_only_emulate_no_memorymap, false,
             "TEST ONLY fail to read memory mappings to emulate sanitized "
             "\"init\"")
+COMMON_FLAG(bool, enable_symbolizer_markup, false,
+            "Use sanitizer symbolizer "
+            "markup, available only on Linux and Fuchsia.")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
index 47983ee7ec713f..25104a1da0ef79 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
@@ -11,10 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common.h"
-#include "sanitizer_placement_new.h"
+#include "sanitizer_flags.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_stacktrace_printer.h"
 #include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -29,7 +30,7 @@ class StackTraceTextPrinter {
         frame_delimiter_(frame_delimiter),
         output_(output),
         dedup_token_(dedup_token),
-        symbolize_(RenderNeedsSymbolization(stack_trace_fmt)) {}
+        symbolize_(RenderNeedsSymbolization(stack_trace_fmt, false)){}
 
   bool ProcessAddressFrames(uptr pc) {
     SymbolizedStack *frames = symbolize_
@@ -43,6 +44,7 @@ class StackTraceTextPrinter {
       RenderFrame(output_, stack_trace_fmt_, frame_num_++, cur->info.address,
                   symbolize_ ? &cur->info : nullptr,
                   common_flags()->symbolize_vs_style,
+                  common_flags()->enable_symbolizer_markup,
                   common_flags()->strip_path_prefix);
 
       if (prev_len != output_->length())
@@ -77,6 +79,41 @@ class StackTraceTextPrinter {
   const bool symbolize_ = false;
 };
 
+class StackTraceMarkupPrinter {
+ public:
+  StackTraceMarkupPrinter(InternalScopedString *output)
+      : output_(output) {}
+
+  bool ProcessAddressFrames(uptr pc) {
+    SymbolizedStack *frames = Symbolizer::GetOrInit()->SymbolizePC(pc);
+
+    if (!frames)
+      return false;
+
+    if (const ListOfModules *modules =
+            Symbolizer::GetOrInit()->GetRefreshedListOfModules()) {
+      RenderModulesMarkup(output_, modules);
+    }
+
+    for (SymbolizedStack *cur = frames; cur; cur = cur->next) {
+      uptr prev_len = output_->length();
+      RenderFrame(output_, "", frame_num_++, cur->info.address, nullptr,
+                  common_flags()->symbolize_vs_style,
+                  /*symbolizer_markup=*/true,
+                  common_flags()->strip_path_prefix);
+
+      if (prev_len != output_->length())
+        output_->append("%c", '\n');
+    }
+    frames->ClearAll();
+    return true;
+  }
+
+ private:
+  InternalScopedString *output_;
+  uptr frame_num_ = 0;
+};
+
 static void CopyStringToBuffer(const InternalScopedString &str, char *out_buf,
                                uptr out_buf_size) {
   if (!out_buf_size)
@@ -94,9 +131,11 @@ void StackTrace::PrintTo(InternalScopedString *output) const {
   CHECK(output);
 
   InternalScopedString dedup_token;
-  StackTraceTextPrinter printer(common_flags()->stack_trace_format, '\n',
-                                output, &dedup_token);
 
+  StackTraceMarkupPrinter markup_printer(output);
+  StackTraceTextPrinter text_printer(common_flags()->stack_trace_format, '\n',
+                                  output, &dedup_token);
+      
   if (trace == nullptr || size == 0) {
     output->append("    <empty stack>\n\n");
     return;
@@ -106,7 +145,10 @@ void StackTrace::PrintTo(InternalScopedString *output) const {
     // PCs in stack traces are actually the return addresses, that is,
     // addresses of the next instructions after the call.
     uptr pc = GetPreviousInstructionPc(trace[i]);
-    CHECK(printer.ProcessAddressFrames(pc));
+    bool result = common_flags()->enable_symbolizer_markup ?
+              markup_printer.ProcessAddressFrames(pc) :
+              text_printer.ProcessAddressFrames(pc);
+    CHECK(result);
   }
 
   // Always add a trailing empty line after stack trace.
@@ -194,8 +236,14 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf,
   pc = StackTrace::GetPreviousInstructionPc(pc);
 
   InternalScopedString output;
-  StackTraceTextPrinter printer(fmt, '\0', &output, nullptr);
-  if (!printer.ProcessAddressFrames(pc)) {
+  StackTraceMarkupPrinter markup_printer(&output);
+  StackTraceTextPrinter text_printer(fmt, '\0', &output, nullptr);
+                                  
+  bool result = common_flags()->enable_symbolizer_markup ?
+        markup_printer.ProcessAddressFrames(pc) :
+        text_printer.ProcessAddressFrames(pc);
+
+  if (!result) {
     output.clear();
     output.append("<can't symbolize>");
   }
@@ -210,7 +258,8 @@ void __sanitizer_symbolize_global(uptr data_addr, const char *fmt,
   DataInfo DI;
   if (!Symbolizer::GetOrInit()->SymbolizeData(data_addr, &DI)) return;
   InternalScopedString data_desc;
-  RenderData(&data_desc, fmt, &DI, common_flags()->strip_path_prefix);
+  RenderData(&data_desc, fmt, &DI, common_flags()->enable_symbolizer_markup,
+             common_flags()->strip_path_prefix);
   internal_strncpy(out_buf, data_desc.data(), out_buf_size);
   out_buf[out_buf_size - 1] = 0;
 }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
index 45c480d225c7f5..fd4e206c295881 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
@@ -14,7 +14,7 @@
 
 #include "sanitizer_file.h"
 #include "sanitizer_flags.h"
-#include "sanitizer_fuchsia.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -143,7 +143,11 @@ static const char kDefaultFormat[] = "    #%n %p %F %L";
 
 void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
                  uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix) {
+                 bool symbolizer_markup, const char *strip_path_prefix) {
+  if (symbolizer_markup) {
+    RenderFrameMarkup(buffer, frame_no, address);
+    return;
+  }
   // info will be null in the case where symbolization is not needed for the
   // given format. This ensures that the code below will get a hard failure
   // rather than print incorrect information in case RenderNeedsSymbolization
@@ -250,7 +254,11 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
   }
 }
 
-bool RenderNeedsSymbolization(const char *format) {
+bool RenderNeedsSymbolization(const char *format, bool symbolizer_markup) {
+  if (symbolizer_markup) {
+    // Online symbolization is never needed for symbolizer markup.
+    return false;
+  }
   if (0 == internal_strcmp(format, "DEFAULT"))
     format = kDefaultFormat;
   for (const char *p = format; *p != '\0'; p++) {
@@ -274,7 +282,12 @@ bool RenderNeedsSymbolization(const char *format) {
 }
 
 void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix) {
+                const DataInfo *DI, bool symbolizer_markup,
+                const char *strip_path_prefix) {
+  if (symbolizer_markup) {
+    RenderDataMarkup(buffer, DI);
+    return;
+  }
   for (const char *p = format; *p != '\0'; p++) {
     if (*p != '%') {
       buffer->append("%c", *p);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
index bf2755a2e8f458..f2674cecaf6390 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
@@ -50,9 +50,9 @@ const char *StripFunctionName(const char *function);
 //   %M - prints module basename and offset, if it is known, or PC.
 void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
                  uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix = "");
+                 bool symbolizer_markup, const char *strip_path_prefix = "");
 
-bool RenderNeedsSymbolization(const char *format);
+bool RenderNeedsSymbolization(const char *format, bool symbolizer_markup);
 
 void RenderSourceLocation(InternalScopedString *buffer, const char *file,
                           int line, int column, bool vs_style,
@@ -67,7 +67,8 @@ void RenderModuleLocation(InternalScopedString *buffer, const char *module,
 // Also accepts:
 //   %g - name of the global variable.
 void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix = "");
+                const DataInfo *DI, bool symbolizer_markup,
+                const char *strip_path_prefix = "");
 
 }  // namespace __sanitizer
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index 7fb7928dce0d8d..e2a82d9747d6b3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -20,7 +20,6 @@
 
 #include "sanitizer_common.h"
 #include "sanitizer_mutex.h"
-#include "sanitizer_vector.h"
 
 namespace __sanitizer {
 
@@ -154,6 +153,10 @@ class Symbolizer final {
 
   void InvalidateModuleList();
 
+  // returns the list of modules if it was refreshed
+  // otherwise returns nullptr.
+  const ListOfModules *GetRefreshedListOfModules();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 3ec4d80105a245..fce98bfabdbeb0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -15,7 +15,6 @@
 
 #include "sanitizer_file.h"
 #include "sanitizer_symbolizer.h"
-#include "sanitizer_vector.h"
 
 namespace __sanitizer {
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index d910aef3f74162..3bb3bde5222167 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -11,9 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_allocator_internal.h"
+#include "sanitizer_common.h"
+#include "sanitizer_flags.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_platform.h"
 #include "sanitizer_symbolizer_internal.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
@@ -191,6 +194,15 @@ void Symbolizer::RefreshModules() {
   modules_fresh_ = true;
 }
 
+const ListOfModules *Symbolizer::GetRefreshedListOfModules() {
+  if (!modules_fresh_) {
+    RefreshModules();
+  }
+
+  CHECK(modules_fresh_);
+  return &modules_;
+}
+
 static const LoadedModule *SearchForModule(const ListOfModules &modules,
                                            uptr address) {
   for (uptr i = 0; i < modules.size(); i++) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index c8c10de10d03a2..c91d20184d967d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -11,20 +11,137 @@
 // Implementation of offline markup symbolizer.
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_platform.h"
-#if SANITIZER_SYMBOLIZER_MARKUP
+#include "sanitizer_symbolizer_markup.h"
 
-#if SANITIZER_FUCHSIA
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_platform.h"
+#include "sanitizer_stacktrace.h"
+#include "sanitizer_symbolizer.h"
 #include "sanitizer_symbolizer_fuchsia.h"
-#  endif
 
-#  include <limits.h>
-#  include <unwind.h>
+namespace __sanitizer {
 
-#  include "sanitizer_stacktrace.h"
-#  include "sanitizer_symbolizer.h"
+void RenderDataMarkup(InternalScopedString *buffer, const DataInfo *DI) {
+  buffer->append(kFormatData, DI->start);
+}
 
-namespace __sanitizer {
+bool RenderNeedsSymbolizationMarkup() { return false; }
+
+void RenderFrameMarkup(InternalScopedString *buffer, int frame_no, uptr address) {
+  CHECK(!RenderNeedsSymbolizationMarkup());
+  buffer->append(kFormatFrame, frame_no, address);
+}
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+  uptr base_address;
+};
+
+bool ModulesEq(const LoadedModule *module,
+               const RenderedModule *renderedModule) {
+  return module->base_address() == renderedModule->base_address &&
+         internal_memcmp(module->uuid(), renderedModule->uuid,
+                         module->uuid_size()) == 0 &&
+         internal_strcmp(module->full_name(), renderedModule->full_name) == 0;
+}
+
+bool ModuleHasBeenRendered(
+    const LoadedModule *module,
+    const InternalMmapVectorNoCtor<RenderedModule> *renderedModules) {
+  for (auto *it = renderedModules->begin(); it != renderedModules->end();
+       ++it) {
+    const auto &renderedModule = *it;
+    if (ModulesEq(module, &renderedModule)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void RenderModulesMarkup(InternalScopedString *buffer,
+                         const ListOfModules *modules) {
+  // Keeps track of the modules that have been rendered.
+  static bool initialized = false;
+  static InternalMmapVectorNoCtor<RenderedModule> renderedModules;
+  if (!initialized) {
+    renderedModules.Initialize(modules->size());
+    initialized = true;
+  }
+
+  if (!renderedModules.size()) {
+    buffer->append("{{{reset}}}\n");
+  }
+
+  for (auto *moduleIt = modules->begin(); moduleIt != modules->end();
+       ++moduleIt) {
+    const LoadedModule &module = *moduleIt;
+
+    if (ModuleHasBeenRendered(&module, &renderedModules)) {
+      continue;
+    }
+
+    buffer->append("{{{module:%d:%s:elf:", renderedModules.size(),
+                   module.full_name());
+    for (uptr i = 0; i < module.uuid_size(); i++) {
+      buffer->append("%02x", module.uuid()[i]);
+    }
+    buffer->append("}}}\n");
+
+    for (const auto &range : module.ranges()) {
+      buffer->append("{{{mmap:%p:%p:load:%d:r", range.beg,
+                     range.end - range.beg, renderedModules.size());
+      if (range.writable)
+        buffer->append("w");
+      if (range.executable)
+        buffer->append("x");
+
+      // module.base_address = dlpi_addr
+      // range.beg = dlpi_addr + p_vaddr
+      // relative address = p_vaddr = range.beg - module.base_address
+      buffer->append(":%p}}}\n", range.beg - module.base_address());
+    }
+
+    renderedModules.push_back({});
+    RenderedModule &curModule = renderedModules.back();
+    curModule.full_name = internal_strdup(module.full_name());
+
+    // kModuleUUIDSize is the...
<truncated>
</pre>

</details>

https://github.com/llvm/llvm-project/pull/66126


More information about the cfe-commits mailing list