[llvm-branch-commits] [compiler-rt] [sanitizer_symbolizer] RenderContextual elements for symbolizer markup. (PR #73194)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Nov 22 18:15:24 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Andres Villegas (avillega)

<details>
<summary>Changes</summary>

This is part of a stack of PRs to add support for symbolizer
markup in linux.

Render contextual symbolizer markup elements. For Fuchsia it is not
neccesarry to emit any context given that Fuchsia's logging
infrastructure already handles emiting it when neccesary.

For more information about contextual symbolizer markup elements:
https://llvm.org/docs/SymbolizerMarkupFormat.html#contextual-elements


---
Full diff: https://github.com/llvm/llvm-project/pull/73194.diff


5 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h (+2) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp (+7) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp (+109) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h (+6) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp (+2) 


``````````diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index 7fb7928dce0d8d4..82cd9bc22791628 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -154,6 +154,8 @@ class Symbolizer final {
 
   void InvalidateModuleList();
 
+  const ListOfModules &GetRefreshedListOfModules();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 81141023386ea01..74458028ae8f553 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -191,6 +191,13 @@ void Symbolizer::RefreshModules() {
   modules_fresh_ = true;
 }
 
+const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
+  if (!modules_fresh_)
+    RefreshModules();
+
+  return modules_;
+}
+
 static const LoadedModule *SearchForModule(const ListOfModules &modules,
                                            uptr address) {
   for (uptr i = 0; i < modules.size(); i++) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index 1627908185f2009..fe9933e942eba55 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -28,6 +28,7 @@ namespace __sanitizer {
 void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer,
                                          const char *format, const DataInfo *DI,
                                          const char *strip_path_prefix) {
+  RenderContext(buffer);
   buffer->AppendF(kFormatData, DI->start);
 }
 
@@ -42,6 +43,7 @@ void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer,
                                           bool vs_style,
                                           const char *strip_path_prefix) {
   CHECK(!RenderNeedsSymbolization(format));
+  RenderContext(buffer);
   buffer->AppendF(kFormatFrame, frame_no, address);
 }
 
@@ -64,4 +66,111 @@ const char *MarkupSymbolizerTool::Demangle(const char *name) {
   return buffer;
 }
 
+// Fuchsia's implementation of symbolizer markup doesn't need to emit contextual
+// elements at this point.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.
+#if !SANITIZER_FUCHSIA
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  uptr base_address;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+};
+
+static bool ModulesEq(const LoadedModule &module,
+                      const RenderedModule &renderedModule) {
+  return module.base_address() == renderedModule.base_address &&
+         internal_memcmp(module.uuid(), renderedModule.uuid,
+                         module.uuid_size()) == 0 &&
+         internal_strcmp(module.full_name(), renderedModule.full_name) == 0;
+}
+
+static bool ModuleHasBeenRendered(
+    const LoadedModule &module,
+    const InternalMmapVectorNoCtor<RenderedModule> &renderedModules) {
+  for (const auto &renderedModule : renderedModules)
+    if (ModulesEq(module, renderedModule))
+      return true;
+
+  return false;
+}
+
+static void RenderModule(InternalScopedString *buffer,
+                         const LoadedModule &module, uptr moduleId) {
+  InternalScopedString buildIdBuffer;
+  for (uptr i = 0; i < module.uuid_size(); i++)
+    buildIdBuffer.AppendF("%02x", module.uuid()[i]);
+
+  buffer->AppendF(kFormatModule, moduleId, module.full_name(),
+                  buildIdBuffer.data());
+  buffer->Append("\n");
+}
+
+static void RenderMmaps(InternalScopedString *buffer,
+                        const LoadedModule &module, uptr moduleId) {
+  InternalScopedString accessBuffer;
+
+  // All module mmaps are readable at least
+  for (const auto &range : module.ranges()) {
+    accessBuffer.Append("r");
+    if (range.writable)
+      accessBuffer.Append("w");
+    if (range.executable)
+      accessBuffer.Append("x");
+
+    //{{{mmap:%starting_addr:%size_in_hex:load:%moduleId:r%(w|x):%relative_addr}}}
+
+    // module.base_address == dlpi_addr
+    // range.beg == dlpi_addr + p_vaddr
+    // relative address == p_vaddr == range.beg - module.base_address
+    buffer->AppendF(kFormatMmap, range.beg, range.end - range.beg, moduleId,
+                    accessBuffer.data(), range.beg - module.base_address());
+
+    buffer->Append("\n");
+    accessBuffer.clear();
+  }
+}
+
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *buffer) {
+  // Keeps track of the modules that have been rendered.
+  static bool initialized = false;
+  static InternalMmapVectorNoCtor<RenderedModule> renderedModules;
+  if (!initialized) {
+    // arbitrary initial size, counting the main module plus some important libs
+    // like libc.
+    renderedModules.Initialize(3);
+    initialized = true;
+  }
+
+  if (renderedModules.size() == 0)
+    buffer->Append("{{{reset}}}\n");
+
+  const auto &modules = Symbolizer::GetOrInit()->GetRefreshedListOfModules();
+
+  for (const auto &module : modules) {
+    if (ModuleHasBeenRendered(module, renderedModules))
+      continue;
+
+    // symbolizer markup id, used to refer to this modules from other contextual
+    // elements
+    uptr moduleId = renderedModules.size();
+
+    RenderModule(buffer, module, moduleId);
+    RenderMmaps(buffer, module, moduleId);
+
+    RenderedModule renderedModule{
+        internal_strdup(module.full_name()), module.base_address(), {}};
+
+    // kModuleUUIDSize is the size of curModule.uuid
+    CHECK_GE(kModuleUUIDSize, module.uuid_size());
+    internal_memcpy(renderedModule.uuid, module.uuid(), module.uuid_size());
+    renderedModules.push_back(renderedModule);
+  }
+}
+#endif  // !SANITIZER_FUCHSIA
+
 }  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
index 0f96ca78600a57e..83643504e1289ea 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
@@ -35,6 +35,12 @@ constexpr const char *kFormatData = "{{{data:%p}}}";
 // One frame in a backtrace (printed on a line by itself).
 constexpr const char *kFormatFrame = "{{{bt:%u:%p}}}";
 
+// Module contextual element.
+constexpr const char *kFormatModule = "{{{module:%d:%s:elf:%s}}}";
+
+// mmap for a module segment.
+constexpr const char *kFormatMmap = "{{{mmap:%p:0x%x:load:%d:%s:0x%x}}}";
+
 // Dump trigger element.
 #define FORMAT_DUMPFILE "{{{dumpfile:%s:%s}}}"
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
index f6c49aa59d7b559..08b06c2faf30d81 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
@@ -72,6 +72,8 @@ StackTracePrinter *StackTracePrinter::NewStackTracePrinter() {
   return new (GetGlobalLowLevelAllocator()) MarkupStackTracePrinter();
 }
 
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *) {}
+
 Symbolizer *Symbolizer::PlatformInit() {
   return new (symbolizer_allocator_) Symbolizer({});
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/73194


More information about the llvm-branch-commits mailing list