[compiler-rt] [sanitizer_symbolizer] Add initial symbolizer markup (PR #72605)

Andres Villegas via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 16 19:17:06 PST 2023


https://github.com/avillega created https://github.com/llvm/llvm-project/pull/72605

for linux

Adds initial support of symbolizer markup for linux. This change adds a runtime sanitizer common flag `enable_symbolizer_markup` that controls the instance of the StackTracePrinter to be used, besides that, it also controls the usage of the implementation of SymbolizerTool that produces symbolizer markup `MarkupSymbolizerTool`.

Comming in future PRs:
- There is some repetition of code between the implementation of the fuchsia symbolizer markup and the more generic symbolizer markup. Making these changes in a different PR will guarantee it is an NFC.
- The RenderContext code uses the Symbolizer list of modules to render the list of modules, according to the symbolizer markup spec. This dependency between the Renderer and Symbolizer can be removed by using a ListOfModules directly and hooking into the right interceptors. Also defering it to make sure it is an NFC.

>From c5b1fab5201a48956654b7633273e7a0fc71abf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Villegas?= <andresvi at google.com>
Date: Thu, 16 Nov 2023 22:38:45 +0000
Subject: [PATCH] [sanitizer_symbolizer] Add initial symbolizer markup for
 linux

Adds initial support of symbolizer markup for linux.
This change adds a runtime sanitizer common flag `enable_symbolizer_markup`
that controls the instance of the StackTracePrinter to be used,
besides that, it also controls the usage of the implementation of
SymbolizerTool that produces symbolizer markup `MarkupSymbolizerTool`.

Comming in future PRs:
- There is some repetition of code between the implementation of the
fuchsia symbolizer markup and the more generic symbolizer markup.
Making these changes in a different PR will guarantee it is an NFC.
- The RenderContext code uses the Symbolizer list of modules
to render the list of modules, according to the symbolizer markup spec.
This dependency between the Renderer and Symbolizer can be
removed by using a ListOfModules directly and hooking into the
right interceptors. Also defering it to make sure it is an NFC.
---
 .../lib/sanitizer_common/CMakeLists.txt       |   4 +-
 .../sanitizer_common_interceptors.inc         |   2 +-
 .../lib/sanitizer_common/sanitizer_flags.inc  |   4 +
 .../lib/sanitizer_common/sanitizer_platform.h |   4 +-
 .../sanitizer_stacktrace_printer.cpp          |  48 +++--
 .../sanitizer_stacktrace_printer.h            |  42 ++--
 .../sanitizer_common/sanitizer_symbolizer.h   |   2 +
 .../sanitizer_symbolizer_libcdep.cpp          |  14 +-
 .../sanitizer_symbolizer_markup.cpp           | 203 ++++++++++++------
 .../sanitizer_symbolizer_markup.h             |  68 ++++++
 .../sanitizer_symbolizer_markup_fuchsia.cpp   |  94 ++++++++
 .../sanitizer_symbolizer_posix_libcdep.cpp    |   7 +
 .../sanitizer_symbolizer_report_fuchsia.cpp   |   4 +-
 .../use-after-free-symbolizer-markup.cpp      |  17 ++
 .../use-after-free-symbolizer-markup.c        |  32 +++
 15 files changed, 424 insertions(+), 121 deletions(-)
 create mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h
 create mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
 create mode 100644 compiler-rt/test/asan/TestCases/use-after-free-symbolizer-markup.cpp
 create mode 100644 compiler-rt/test/hwasan/TestCases/use-after-free-symbolizer-markup.c

diff --git a/compiler-rt/lib/sanitizer_common/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
index 61e832a30eb3767f..99883bd1d54d050f 100644
--- a/compiler-rt/lib/sanitizer_common/CMakeLists.txt
+++ b/compiler-rt/lib/sanitizer_common/CMakeLists.txt
@@ -88,6 +88,7 @@ set(SANITIZER_SYMBOLIZER_SOURCES
   sanitizer_symbolizer_libcdep.cpp
   sanitizer_symbolizer_mac.cpp
   sanitizer_symbolizer_markup.cpp
+  sanitizer_symbolizer_markup_fuchsia.cpp
   sanitizer_symbolizer_posix_libcdep.cpp
   sanitizer_symbolizer_report.cpp
   sanitizer_symbolizer_report_fuchsia.cpp
@@ -191,10 +192,11 @@ set(SANITIZER_IMPL_HEADERS
   sanitizer_stoptheworld.h
   sanitizer_suppressions.h
   sanitizer_symbolizer.h
-  sanitizer_symbolizer_markup_constants.h
   sanitizer_symbolizer_internal.h
   sanitizer_symbolizer_libbacktrace.h
   sanitizer_symbolizer_mac.h
+  sanitizer_symbolizer_markup.h
+  sanitizer_symbolizer_markup_constants.h
   sanitizer_syscall_generic.inc
   sanitizer_syscall_linux_aarch64.inc
   sanitizer_syscall_linux_arm.inc
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 607ecae6808b72af..255bf263208373b9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -6357,7 +6357,7 @@ INTERCEPTOR(int, dlclose, void *handle) {
   COMMON_INTERCEPT_FUNCTION(dlclose);
 #else
 #define INIT_DLOPEN_DLCLOSE
-#endif
+#endif //  SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
 
 #if SANITIZER_INTERCEPT_GETPASS
 INTERCEPTOR(char *, getpass, const char *prompt) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index 949bdbd148b6b894..490b7d7ecdc4b75a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -275,3 +275,7 @@ COMMON_FLAG(bool, test_only_emulate_no_memorymap, false,
 // program.
 COMMON_FLAG(bool, test_only_replace_dlopen_main_program, false,
             "TEST ONLY replace dlopen(<main program>,...) with dlopen(NULL)")
+
+COMMON_FLAG(bool, enable_symbolizer_markup, false,
+    "Use sanitizer symbolizer markup, available on Linux " 
+    "and always set true for fuchsia.")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
index 3e1b078a0212f5e2..d5afde7d627800eb 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
@@ -415,9 +415,9 @@
 
 // Enable offline markup symbolizer for Fuchsia.
 #if SANITIZER_FUCHSIA
-#  define SANITIZER_SYMBOLIZER_MARKUP 1
+#  define SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA 1
 #else
-#  define SANITIZER_SYMBOLIZER_MARKUP 0
+#  define SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA 0
 #endif
 
 // Enable ability to support sanitizer initialization that is
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
index 8db321051e1a0de0..7a3e4827a7e0eeb8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
@@ -12,28 +12,15 @@
 
 #include "sanitizer_stacktrace_printer.h"
 
+#include "sanitizer_common.h"
 #include "sanitizer_file.h"
 #include "sanitizer_flags.h"
-#include "sanitizer_fuchsia.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
-StackTracePrinter *StackTracePrinter::GetOrInit() {
-  static StackTracePrinter *stacktrace_printer;
-  static StaticSpinMutex init_mu;
-  SpinMutexLock l(&init_mu);
-  if (stacktrace_printer)
-    return stacktrace_printer;
-
-  stacktrace_printer =
-      new (GetGlobalLowLevelAllocator()) FormattedStackTracePrinter();
 
-  CHECK(stacktrace_printer);
-  return stacktrace_printer;
-}
-
-const char *FormattedStackTracePrinter::StripFunctionName(
-    const char *function) {
+const char *StackTracePrinter::StripFunctionName(const char *function) {
   if (!common_flags()->demangle)
     return function;
   if (!function)
@@ -59,8 +46,27 @@ const char *FormattedStackTracePrinter::StripFunctionName(
   return function;
 }
 
-// sanitizer_symbolizer_markup.cpp implements these differently.
-#if !SANITIZER_SYMBOLIZER_MARKUP
+// sanitizer_symbolizer_markup_fuchsia.cpp implements these differently.
+#if !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+
+StackTracePrinter *StackTracePrinter::GetOrInit() {
+  static StackTracePrinter *stacktrace_printer;
+  static StaticSpinMutex init_mu;
+  SpinMutexLock l(&init_mu);
+  if (stacktrace_printer)
+    return stacktrace_printer;
+
+  if (common_flags()->enable_symbolizer_markup) {
+    stacktrace_printer =
+        new (GetGlobalLowLevelAllocator()) MarkupStackTracePrinter();
+  } else {
+    stacktrace_printer =
+        new (GetGlobalLowLevelAllocator()) FormattedStackTracePrinter();
+  }
+
+  CHECK(stacktrace_printer);
+  return stacktrace_printer;
+}
 
 static const char *DemangleFunctionName(const char *function) {
   if (!common_flags()->demangle)
@@ -322,9 +328,9 @@ void FormattedStackTracePrinter::RenderData(InternalScopedString *buffer,
   }
 }
 
-#endif  // !SANITIZER_SYMBOLIZER_MARKUP
+#endif  // !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
 
-void FormattedStackTracePrinter::RenderSourceLocation(
+void StackTracePrinter::RenderSourceLocation(
     InternalScopedString *buffer, const char *file, int line, int column,
     bool vs_style, const char *strip_path_prefix) {
   if (vs_style && line > 0) {
@@ -343,7 +349,7 @@ void FormattedStackTracePrinter::RenderSourceLocation(
   }
 }
 
-void FormattedStackTracePrinter::RenderModuleLocation(
+void StackTracePrinter::RenderModuleLocation(
     InternalScopedString *buffer, const char *module, uptr offset,
     ModuleArch arch, const char *strip_path_prefix) {
   buffer->AppendF("(%s", StripPathPrefix(module, strip_path_prefix));
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
index 3e02333a8c8d5cc2..a3d2307e87b656ce 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h
@@ -12,6 +12,7 @@
 #ifndef SANITIZER_STACKTRACE_PRINTER_H
 #define SANITIZER_STACKTRACE_PRINTER_H
 
+#include "sanitizer_platform.h"
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_symbolizer.h"
@@ -25,7 +26,18 @@ class StackTracePrinter {
  public:
   static StackTracePrinter *GetOrInit();
 
-  virtual const char *StripFunctionName(const char *function) = 0;
+  // Strip interceptor prefixes from function name.
+  const char *StripFunctionName(const char *function);
+
+  void RenderSourceLocation(InternalScopedString *buffer,
+                                    const char *file, int line, int column,
+                                    bool vs_style,
+                                    const char *strip_path_prefix);
+
+  void RenderModuleLocation(InternalScopedString *buffer,
+                                    const char *module, uptr offset,
+                                    ModuleArch arch,
+                                    const char *strip_path_prefix);
 
   virtual void RenderFrame(InternalScopedString *buffer, const char *format,
                            int frame_no, uptr address, const AddressInfo *info,
@@ -34,15 +46,6 @@ class StackTracePrinter {
 
   virtual bool RenderNeedsSymbolization(const char *format) = 0;
 
-  virtual void RenderSourceLocation(InternalScopedString *buffer,
-                                    const char *file, int line, int column,
-                                    bool vs_style,
-                                    const char *strip_path_prefix) = 0;
-
-  virtual void RenderModuleLocation(InternalScopedString *buffer,
-                                    const char *module, uptr offset,
-                                    ModuleArch arch,
-                                    const char *strip_path_prefix) = 0;
   virtual void RenderData(InternalScopedString *buffer, const char *format,
                           const DataInfo *DI,
                           const char *strip_path_prefix = "") = 0;
@@ -51,11 +54,14 @@ class StackTracePrinter {
   ~StackTracePrinter() {}
 };
 
+
+// See sanitizer_symbolizer_markup.h for the markup implementation of
+// StackTracePrinter. This is code is omited for targets that opt in to
+// use SymbolizerMarkup only.
+#if !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+
 class FormattedStackTracePrinter : public StackTracePrinter {
  public:
-  // Strip interceptor prefixes from function name.
-  const char *StripFunctionName(const char *function) override;
-
   // Render the contents of "info" structure, which represents the contents of
   // stack frame "frame_no" and appends it to the "buffer". "format" is a
   // string with placeholders, which is copied to the output with
@@ -90,14 +96,6 @@ class FormattedStackTracePrinter : public StackTracePrinter {
 
   bool RenderNeedsSymbolization(const char *format) override;
 
-  void RenderSourceLocation(InternalScopedString *buffer, const char *file,
-                            int line, int column, bool vs_style,
-                            const char *strip_path_prefix) override;
-
-  void RenderModuleLocation(InternalScopedString *buffer, const char *module,
-                            uptr offset, ModuleArch arch,
-                            const char *strip_path_prefix) override;
-
   // Same as RenderFrame, but for data section (global variables).
   // Accepts %s, %l from above.
   // Also accepts:
@@ -110,6 +108,8 @@ class FormattedStackTracePrinter : public StackTracePrinter {
   ~FormattedStackTracePrinter() {}
 };
 
+#endif // !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_STACKTRACE_PRINTER_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index 7fb7928dce0d8d4c..40636e9722c461d1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -154,6 +154,8 @@ class Symbolizer final {
 
   void InvalidateModuleList();
 
+  ListOfModules &GetRefreshedListOfModules();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 81141023386ea01e..b32ca05eadc38654 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -26,8 +26,8 @@ Symbolizer *Symbolizer::GetOrInit() {
   return symbolizer_;
 }
 
-// See sanitizer_symbolizer_markup.cpp.
-#if !SANITIZER_SYMBOLIZER_MARKUP
+// See sanitizer_symbolizer_markup_fuchsia.cpp.
+#if !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
 
 const char *ExtractToken(const char *str, const char *delims, char **result) {
   uptr prefix_len = internal_strcspn(str, delims);
@@ -191,6 +191,14 @@ void Symbolizer::RefreshModules() {
   modules_fresh_ = true;
 }
 
+ListOfModules &Symbolizer::GetRefreshedListOfModules() {
+  if(!modules_fresh_) {
+      RefreshModules();
+  }
+  CHECK(modules_fresh_);
+  return modules_;
+}
+
 static const LoadedModule *SearchForModule(const ListOfModules &modules,
                                            uptr address) {
   for (uptr i = 0; i < modules.size(); i++) {
@@ -566,6 +574,6 @@ bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
   return true;
 }
 
-#endif  // !SANITIZER_SYMBOLIZER_MARKUP
+#endif  // !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
 
 }  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index 6402cfd7c36e443e..87217595375622e9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -8,103 +8,166 @@
 //
 // This file is shared between various sanitizers' runtime libraries.
 //
-// Implementation of offline markup symbolizer.
+// This generic support for offline symbolizing is based on the
+// Fuchsia port.  We don't do any actual symbolization per se.
+// Instead, we emit text containing raw addresses and raw linkage
+// symbol names, embedded in Fuchsia's symbolization markup format.
+// See the spec at: https://llvm.org/docs/SymbolizerMarkupFormat.html
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_platform.h"
-
-#if SANITIZER_SYMBOLIZER_MARKUP
+#include "sanitizer_symbolizer_markup.h"
 
-#  include "sanitizer_common.h"
-#  include "sanitizer_stacktrace_printer.h"
-#  include "sanitizer_symbolizer.h"
-#  include "sanitizer_symbolizer_markup_constants.h"
+#include "sanitizer_common.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_platform.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_markup_constants.h"
 
 namespace __sanitizer {
 
-// This generic support for offline symbolizing is based on the
-// Fuchsia port.  We don't do any actual symbolization per se.
-// Instead, we emit text containing raw addresses and raw linkage
-// symbol names, embedded in Fuchsia's symbolization markup format.
-// Fuchsia's logging infrastructure emits enough information about
-// process memory layout that a post-processing filter can do the
-// symbolization and pretty-print the markup.  See the spec at:
-// https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
-
-// This is used by UBSan for type names, and by ASan for global variable names.
-// It's expected to return a static buffer that will be reused on each call.
-const char *Symbolizer::Demangle(const char *name) {
-  static char buffer[kFormatDemangleMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
-  return buffer;
+void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer,
+                                         const char *format, const DataInfo *DI,
+                                         const char *strip_path_prefix) {
+  RenderContext(buffer);
+  buffer->AppendF(kFormatData, DI->start);
 }
 
-// This is used mostly for suppression matching.  Making it work
-// would enable "interceptor_via_lib" suppressions.  It's also used
-// once in UBSan to say "in module ..." in a message that also
-// includes an address in the module, so post-processing can already
-// pretty-print that so as to indicate the module.
-bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
-                                             uptr *module_address) {
+bool MarkupStackTracePrinter::RenderNeedsSymbolization(const char *format) {
   return false;
 }
 
-// This is mainly used by hwasan for online symbolization. This isn't needed
-// since hwasan can always just dump stack frames for offline symbolization.
-bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
-
-// This is used in some places for suppression checking, which we
-// don't really support for Fuchsia.  It's also used in UBSan to
-// identify a PC location to a function name, so we always fill in
-// the function member with a string containing markup around the PC
-// value.
-// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
-// to render stack frames, but that should be changed to use
-// RenderStackFrame.
-SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
-  SymbolizedStack *s = SymbolizedStack::New(addr);
+void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer,
+                                          const char *format, int frame_no,
+                                          uptr address, const AddressInfo *info,
+                                          bool vs_style,
+                                          const char *strip_path_prefix) {
+  CHECK(!RenderNeedsSymbolization(format));
+  RenderContext(buffer);
+  buffer->AppendF(kFormatFrame, frame_no, address);
+}
+
+bool MarkupSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   char buffer[kFormatFunctionMax];
   internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
-  s->info.function = internal_strdup(buffer);
-  return s;
+  stack->info.function = internal_strdup(buffer);
+  return true;
 }
 
-// Always claim we succeeded, so that RenderDataInfo will be called.
-bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+bool MarkupSymbolizerTool::SymbolizeData(uptr addr, DataInfo *info) {
   info->Clear();
   info->start = addr;
   return true;
 }
 
-// We ignore the format argument to __sanitizer_symbolize_global.
-void FormattedStackTracePrinter::RenderData(InternalScopedString *buffer,
-                                            const char *format,
-                                            const DataInfo *DI,
-                                            const char *strip_path_prefix) {
-  buffer->AppendF(kFormatData, DI->start);
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *MarkupSymbolizerTool::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
 }
 
-bool FormattedStackTracePrinter::RenderNeedsSymbolization(const char *format) {
-  return false;
+// Fuchsia's implementation of symbolizer markup doesn't need to emit contextual
+// elements at this point.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.
+#if !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  uptr base_address;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+};
+
+static bool ModulesEq(const LoadedModule &module,
+                      const RenderedModule &renderedModule) {
+  return module.base_address() == renderedModule.base_address &&
+         internal_memcmp(module.uuid(), renderedModule.uuid,
+                         module.uuid_size()) == 0 &&
+         internal_strcmp(module.full_name(), renderedModule.full_name) == 0;
 }
 
-// We don't support the stack_trace_format flag at all.
-void FormattedStackTracePrinter::RenderFrame(InternalScopedString *buffer,
-                                             const char *format, int frame_no,
-                                             uptr address,
-                                             const AddressInfo *info,
-                                             bool vs_style,
-                                             const char *strip_path_prefix) {
-  CHECK(!RenderNeedsSymbolization(format));
-  buffer->AppendF(kFormatFrame, frame_no, address);
+static bool ModuleHasBeenRendered(
+    const LoadedModule &module,
+    const InternalMmapVectorNoCtor<RenderedModule> &renderedModules) {
+  for (const auto &renderedModule : renderedModules) {
+    if (ModulesEq(module, renderedModule)) {
+      return true;
+    }
+  }
+  return false;
 }
 
-Symbolizer *Symbolizer::PlatformInit() {
-  return new (symbolizer_allocator_) Symbolizer({});
+static void RenderModule(InternalScopedString *buffer,
+                         const LoadedModule &module, uptr moduleId) {
+  buffer->AppendF("{{{module:%d:%s:elf:", moduleId, module.full_name());
+  for (uptr i = 0; i < module.uuid_size(); i++) {
+    buffer->AppendF("%02x", module.uuid()[i]);
+  }
+  buffer->Append("}}}\n");
 }
 
-void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+static void RenderMmaps(InternalScopedString *buffer,
+                        const LoadedModule &module, uptr moduleId) {
+  for (const auto &range : module.ranges()) {
+    //{{{mmap:starting_addr:size_in_hex:load:module_Id:r(w|x):relative_addr}}}
+    buffer->AppendF("{{{mmap:%p:%p:load:%d:", range.beg, range.end - range.beg,
+                    moduleId);
+
+    // All module mmaps are readable at least
+    buffer->Append("r");
+    if (range.writable)
+      buffer->Append("w");
+    if (range.executable)
+      buffer->Append("x");
+
+    // module.base_address == dlpi_addr
+    // range.beg == dlpi_addr + p_vaddr
+    // relative address == p_vaddr == range.beg - module.base_address
+    buffer->AppendF(":%p}}}\n", range.beg - module.base_address());
+  }
+}
 
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *buffer) {
+  // Keeps track of the modules that have been rendered.
+  static bool initialized = false;
+  static InternalMmapVectorNoCtor<RenderedModule> renderedModules;
+  if (!initialized) {
+    // arbitrary initial size, counting the main module plus some important libs
+    // like libc.
+    renderedModules.Initialize(3);
+    initialized = true;
+  }
+
+  if (renderedModules.size() == 0) {
+    buffer->Append("{{{reset}}}\n");
+  }
+
+  const auto &modules = Symbolizer::GetOrInit()->GetRefreshedListOfModules();
+
+  for (const auto &module : modules) {
+    if (ModuleHasBeenRendered(module, renderedModules)) {
+      continue;
+    }
+
+    // symbolizer markup id, used to refer to this modules from other contextual
+    // elements
+    uptr moduleId = renderedModules.size();
+
+    RenderModule(buffer, module, moduleId);
+    RenderMmaps(buffer, module, moduleId);
+
+    RenderedModule renderedModule{
+        internal_strdup(module.full_name()), module.base_address(), {}};
+
+    // kModuleUUIDSize is the size of curModule.uuid
+    CHECK_GE(kModuleUUIDSize, module.uuid_size());
+    internal_memcpy(renderedModule.uuid, module.uuid(), module.uuid_size());
+    renderedModules.push_back(renderedModule);
+  }
+}
+#endif  // !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
 }  // namespace __sanitizer
-
-#endif  // SANITIZER_SYMBOLIZER_MARKUP
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h
new file mode 100644
index 0000000000000000..419dd00732a77a4a
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.h
@@ -0,0 +1,68 @@
+//===-- sanitizer_symbolizer_markup.h -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries.
+//
+//  Header for the offline markup symbolizer.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_SYMBOLIZER_MARKUP_H
+#define SANITIZER_SYMBOLIZER_MARKUP_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_stacktrace_printer.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_internal.h"
+
+namespace __sanitizer {
+
+class MarkupSymbolizerTool final : public SymbolizerTool {
+ public:
+  // This is used in some places for suppression checking, which we
+  // don't really support for Fuchsia.  It's also used in UBSan to
+  // identify a PC location to a function name, so we always fill in
+  // the function member with a string containing markup around the PC
+  // value.
+  // TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+  // to render stack frames, but that should be changed to use
+  // RenderStackFrame.
+  bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
+
+  // Always claim we succeeded, so that RenderDataInfo will be called.
+  bool SymbolizeData(uptr addr, DataInfo *info) override;
+
+  // May return NULL if demangling failed.
+  // This is used by UBSan for type names, and by ASan for global variable
+  // names. It's expected to return a static buffer that will be reused on each
+  // call.
+  const char *Demangle(const char *name) override;
+};
+
+class MarkupStackTracePrinter : public StackTracePrinter {
+ public:
+  // We don't support the stack_trace_format flag at all.
+  void RenderFrame(InternalScopedString *buffer, const char *format,
+                   int frame_no, uptr address, const AddressInfo *info,
+                   bool vs_style, const char *strip_path_prefix = "") override;
+
+  bool RenderNeedsSymbolization(const char *format) override;
+
+  // We ignore the format argument to __sanitizer_symbolize_global.
+  void RenderData(InternalScopedString *buffer, const char *format,
+                  const DataInfo *DI,
+                  const char *strip_path_prefix = "") override;
+
+ private:
+  void RenderContext(InternalScopedString *buffer);
+
+ protected:
+  ~MarkupStackTracePrinter() {}
+};
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
new file mode 100644
index 0000000000000000..bb5401c478e5c112
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
@@ -0,0 +1,94 @@
+
+//===-- sanitizer_symbolizer_markup.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries.
+// This is the implementation of the fuchsia specifics for symbolizer markup.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+#include "sanitizer_symbolizer_markup.h"
+
+#include "sanitizer_common.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_stacktrace_printer.h"
+#include "sanitizer_symbolizer_markup_constants.h"
+
+namespace __sanitizer {
+
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *){};
+
+// Fuchsia always uses MarkupStackTracePrinter
+StackTracePrinter *StackTracePrinter::GetOrInit() {
+  static StackTracePrinter *stacktrace_printer;
+  static StaticSpinMutex init_mu;
+  SpinMutexLock l(&init_mu);
+  if (stacktrace_printer)
+    return stacktrace_printer;
+
+  stacktrace_printer =
+      new (GetGlobalLowLevelAllocator()) MarkupStackTracePrinter();
+
+  CHECK(stacktrace_printer);
+  return stacktrace_printer;
+}
+
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *Symbolizer::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
+}
+
+// This is used mostly for suppression matching.  Making it work
+// would enable "interceptor_via_lib" suppressions.  It's also used
+// once in UBSan to say "in module ..." in a message that also
+// includes an address in the module, so post-processing can already
+// pretty-print that so as to indicate the module.
+bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
+                                             uptr *module_address) {
+  return false;
+}
+
+// This is mainly used by hwasan for online symbolization. This isn't needed
+// since hwasan can always just dump stack frames for offline symbolization.
+bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
+
+// This is used in some places for suppression checking, which we
+// don't really support for Fuchsia.  It's also used in UBSan to
+// identify a PC location to a function name, so we always fill in
+// the function member with a string containing markup around the PC
+// value.
+// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+// to render stack frames, but that should be changed to use
+// RenderStackFrame.
+SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
+  SymbolizedStack *s = SymbolizedStack::New(addr);
+  char buffer[kFormatFunctionMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
+  s->info.function = internal_strdup(buffer);
+  return s;
+}
+
+// Always claim we succeeded, so that RenderDataInfo will be called.
+bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+  info->Clear();
+  info->start = addr;
+  return true;
+}
+
+Symbolizer *Symbolizer::PlatformInit() {
+  return new (symbolizer_allocator_) Symbolizer({});
+}
+
+void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+
+}  // namespace __sanitizer
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index d92349c04fffabd5..101803579d02e094 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
+#include "sanitizer_symbolizer_markup.h"
 #if SANITIZER_POSIX
 #  include <dlfcn.h>  // for dlsym()
 #  include <errno.h>
@@ -475,6 +476,12 @@ static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
     VReport(2, "Symbolizer is disabled.\n");
     return;
   }
+  if(common_flags()->enable_symbolizer_markup) {
+      VReport(2, "Symbolizer markup enabled.\n");
+      SymbolizerTool *tool = new(*allocator) MarkupSymbolizerTool();
+      list->push_back(tool);
+      return;
+  }
   if (IsAllocatorOutOfMemory()) {
     VReport(2, "Cannot use internal symbolizer: out of memory\n");
   } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report_fuchsia.cpp
index fb49cfbb304704ae..3e001fa8863f8a20 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report_fuchsia.cpp
@@ -13,7 +13,7 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_SYMBOLIZER_MARKUP
+#if SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
 
 #  include "sanitizer_common.h"
 
@@ -30,4 +30,4 @@ void HandleDeadlySignal(void *siginfo, void *context, u32 tid,
 
 }  // namespace __sanitizer
 
-#endif  // SANITIZER_SYMBOLIZER_MARKUP
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
diff --git a/compiler-rt/test/asan/TestCases/use-after-free-symbolizer-markup.cpp b/compiler-rt/test/asan/TestCases/use-after-free-symbolizer-markup.cpp
new file mode 100644
index 0000000000000000..6e972c88f6d488a6
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/use-after-free-symbolizer-markup.cpp
@@ -0,0 +1,17 @@
+// Test that verifies that asan produces valid symbolizer markup when enabled.
+// RUN: %clangxx_asan -O1 %s -o %t 
+// RUN: env ASAN_OPTIONS=enable_symbolizer_markup=1 not %run %t 2>&1 | FileCheck %s
+// REQUIRES: linux
+
+#include <stdlib.h>
+int main() {
+  char *x = (char*)malloc(10 * sizeof(char));
+  free(x);
+  return x[5];
+}
+// COM: For element syntax see: https://llvm.org/docs/SymbolizerMarkupFormat.html
+// COM: OPEN is {{{ and CLOSE is }}}
+// CHECK: [[OPEN:{{{]]reset[[CLOSE:}}}]]
+// CHECK: [[OPEN]]module:[[MOD_ID:[0-9]+]]:{{.+}}:elf:{{[0-9a-fA-F]+}}[[CLOSE]]
+// CHECK: [[OPEN]]mmap:{{0x[0-9a-fA-F]+:0x[0-9a-fA-F]+}}:load:[[MOD_ID]]:{{r[wx]{0,2}:0x[0-9a-fA-F]+}}[[CLOSE]]
+// CHECK: [[OPEN]]bt:{{[0-9]+}}:0x{{[0-9a-fA-F]+}}[[CLOSE]]
diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free-symbolizer-markup.c b/compiler-rt/test/hwasan/TestCases/use-after-free-symbolizer-markup.c
new file mode 100644
index 0000000000000000..39d1b1f96d52b59b
--- /dev/null
+++ b/compiler-rt/test/hwasan/TestCases/use-after-free-symbolizer-markup.c
@@ -0,0 +1,32 @@
+// Test that verifies that hwasan produces valid symbolizer markup when enabled.
+// RUN: %clang_hwasan -O0 %s -o %t
+// RUN: env HWASAN_OPTIONS=enable_symbolizer_markup=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_hwasan -O1 %s -o %t
+// RUN: env HWASAN_OPTIONS=enable_symbolizer_markup=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_hwasan -O2 %s -o %t
+// RUN: env HWASAN_OPTIONS=enable_symbolizer_markup=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_hwasan -O3 %s -o %t
+// RUN: env HWASAN_OPTIONS=enable_symbolizer_markup=1 not %run %t 2>&1 | FileCheck %s
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sanitizer/hwasan_interface.h>
+
+int main() {
+  __hwasan_enable_allocator_tagging();
+  char * volatile x = (char*)malloc(10);
+  free(x);
+  __hwasan_disable_allocator_tagging();
+
+  int r = 0;
+  r = x[5];
+  return r;
+}
+
+// COM: For element syntax see: https://llvm.org/docs/SymbolizerMarkupFormat.html
+// COM: OPEN is {{{ and CLOSE is }}}
+// CHECK: [[OPEN:{{{]]reset[[CLOSE:}}}]]
+// CHECK: [[OPEN]]module:[[MOD_ID:[0-9]+]]:{{.+}}:elf:{{[0-9a-fA-F]+}}[[CLOSE]]
+// CHECK: [[OPEN]]mmap:{{0x[0-9a-fA-F]+:0x[0-9a-fA-F]+}}:load:[[MOD_ID]]:{{r[wx]{0,2}:0x[0-9a-fA-F]+}}[[CLOSE]]
+// CHECK: [[OPEN]]bt:{{[0-9]+}}:0x{{[0-9a-fA-F]+}}[[CLOSE]]



More information about the llvm-commits mailing list