[compiler-rt] [sanitizer_symbolizer] Add initial symbolizer markup (PR #72605)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 17 13:22:48 PST 2023


=?utf-8?q?Andrés?= Villegas <andresvi at google.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/72605 at github.com>


================
@@ -8,103 +8,166 @@
 //
 // This file is shared between various sanitizers' runtime libraries.
 //
-// Implementation of offline markup symbolizer.
+// This generic support for offline symbolizing is based on the
+// Fuchsia port.  We don't do any actual symbolization per se.
+// Instead, we emit text containing raw addresses and raw linkage
+// symbol names, embedded in Fuchsia's symbolization markup format.
+// See the spec at: https://llvm.org/docs/SymbolizerMarkupFormat.html
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_platform.h"
-
-#if SANITIZER_SYMBOLIZER_MARKUP
+#include "sanitizer_symbolizer_markup.h"
 
-#  include "sanitizer_common.h"
-#  include "sanitizer_stacktrace_printer.h"
-#  include "sanitizer_symbolizer.h"
-#  include "sanitizer_symbolizer_markup_constants.h"
+#include "sanitizer_common.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_platform.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_markup_constants.h"
 
 namespace __sanitizer {
 
-// This generic support for offline symbolizing is based on the
-// Fuchsia port.  We don't do any actual symbolization per se.
-// Instead, we emit text containing raw addresses and raw linkage
-// symbol names, embedded in Fuchsia's symbolization markup format.
-// Fuchsia's logging infrastructure emits enough information about
-// process memory layout that a post-processing filter can do the
-// symbolization and pretty-print the markup.  See the spec at:
-// https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
-
-// This is used by UBSan for type names, and by ASan for global variable names.
-// It's expected to return a static buffer that will be reused on each call.
-const char *Symbolizer::Demangle(const char *name) {
-  static char buffer[kFormatDemangleMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
-  return buffer;
+void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer,
+                                         const char *format, const DataInfo *DI,
+                                         const char *strip_path_prefix) {
+  RenderContext(buffer);
+  buffer->AppendF(kFormatData, DI->start);
 }
 
-// This is used mostly for suppression matching.  Making it work
-// would enable "interceptor_via_lib" suppressions.  It's also used
-// once in UBSan to say "in module ..." in a message that also
-// includes an address in the module, so post-processing can already
-// pretty-print that so as to indicate the module.
-bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
-                                             uptr *module_address) {
+bool MarkupStackTracePrinter::RenderNeedsSymbolization(const char *format) {
   return false;
 }
 
-// This is mainly used by hwasan for online symbolization. This isn't needed
-// since hwasan can always just dump stack frames for offline symbolization.
-bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
-
-// This is used in some places for suppression checking, which we
-// don't really support for Fuchsia.  It's also used in UBSan to
-// identify a PC location to a function name, so we always fill in
-// the function member with a string containing markup around the PC
-// value.
-// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
-// to render stack frames, but that should be changed to use
-// RenderStackFrame.
-SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
-  SymbolizedStack *s = SymbolizedStack::New(addr);
+void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer,
+                                          const char *format, int frame_no,
+                                          uptr address, const AddressInfo *info,
+                                          bool vs_style,
+                                          const char *strip_path_prefix) {
+  CHECK(!RenderNeedsSymbolization(format));
+  RenderContext(buffer);
+  buffer->AppendF(kFormatFrame, frame_no, address);
+}
+
+bool MarkupSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   char buffer[kFormatFunctionMax];
   internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
-  s->info.function = internal_strdup(buffer);
-  return s;
+  stack->info.function = internal_strdup(buffer);
+  return true;
 }
 
-// Always claim we succeeded, so that RenderDataInfo will be called.
-bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+bool MarkupSymbolizerTool::SymbolizeData(uptr addr, DataInfo *info) {
   info->Clear();
   info->start = addr;
   return true;
 }
 
-// We ignore the format argument to __sanitizer_symbolize_global.
-void FormattedStackTracePrinter::RenderData(InternalScopedString *buffer,
-                                            const char *format,
-                                            const DataInfo *DI,
-                                            const char *strip_path_prefix) {
-  buffer->AppendF(kFormatData, DI->start);
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *MarkupSymbolizerTool::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
 }
 
-bool FormattedStackTracePrinter::RenderNeedsSymbolization(const char *format) {
-  return false;
+// Fuchsia's implementation of symbolizer markup doesn't need to emit contextual
+// elements at this point.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.
+#if !SANITIZER_SYMBOLIZER_MARKUP_FUCHSIA
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  uptr base_address;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+};
+
+static bool ModulesEq(const LoadedModule &module,
+                      const RenderedModule &renderedModule) {
+  return module.base_address() == renderedModule.base_address &&
+         internal_memcmp(module.uuid(), renderedModule.uuid,
+                         module.uuid_size()) == 0 &&
+         internal_strcmp(module.full_name(), renderedModule.full_name) == 0;
 }
 
-// We don't support the stack_trace_format flag at all.
-void FormattedStackTracePrinter::RenderFrame(InternalScopedString *buffer,
-                                             const char *format, int frame_no,
-                                             uptr address,
-                                             const AddressInfo *info,
-                                             bool vs_style,
-                                             const char *strip_path_prefix) {
-  CHECK(!RenderNeedsSymbolization(format));
-  buffer->AppendF(kFormatFrame, frame_no, address);
+static bool ModuleHasBeenRendered(
+    const LoadedModule &module,
+    const InternalMmapVectorNoCtor<RenderedModule> &renderedModules) {
+  for (const auto &renderedModule : renderedModules) {
+    if (ModulesEq(module, renderedModule)) {
+      return true;
+    }
+  }
+  return false;
 }
 
-Symbolizer *Symbolizer::PlatformInit() {
-  return new (symbolizer_allocator_) Symbolizer({});
+static void RenderModule(InternalScopedString *buffer,
+                         const LoadedModule &module, uptr moduleId) {
+  buffer->AppendF("{{{module:%d:%s:elf:", moduleId, module.full_name());
+  for (uptr i = 0; i < module.uuid_size(); i++) {
+    buffer->AppendF("%02x", module.uuid()[i]);
+  }
+  buffer->Append("}}}\n");
 }
 
-void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+static void RenderMmaps(InternalScopedString *buffer,
+                        const LoadedModule &module, uptr moduleId) {
+  for (const auto &range : module.ranges()) {
+    //{{{mmap:starting_addr:size_in_hex:load:module_Id:r(w|x):relative_addr}}}
+    buffer->AppendF("{{{mmap:%p:%p:load:%d:", range.beg, range.end - range.beg,
----------------
PiJoules wrote:

Could this also be a constexpr string added to `sanitizer_symbolizer_markup_constants.h`? Something like: "`{{{mmap:%p:%p:load:%d:%s:%p}}}`?

https://github.com/llvm/llvm-project/pull/72605


More information about the llvm-commits mailing list