[compiler-rt] [sanitizer] Implement __sanitizer_symbolize_frame (PR #67491)

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 14:28:00 PDT 2023


https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/67491

__sanitizer_symbolize_frame is used by HWASAN
for use after scope reports.


>From 2c34339b9be671a77aad3038a53661d2b392bbfb Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 26 Sep 2023 11:11:06 -0700
Subject: [PATCH] [sanitizer] Implement __sanitizer_symbolize_frame

__sanitizer_symbolize_frame is used by HWASAN
for use after scope reports.
---
 .../sanitizer_common_interface_posix.inc      |  1 +
 .../sanitizer_symbolizer_internal.h           |  9 +++++
 .../sanitizer_symbolizer_libcdep.cpp          |  4 +-
 .../sanitizer_symbolizer_posix_libcdep.cpp    | 13 +++++++
 .../symbolizer/sanitizer_symbolize.cpp        | 23 ++++++++++++
 .../symbolizer/scripts/build_symbolizer.sh    |  1 +
 .../symbolizer/scripts/global_symbols.txt     |  1 +
 .../TestCases/Linux/internal_symbolizer.cpp   | 37 +++++++++++++++++++
 8 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc
index a5259be9335aca8..6b567edc97a8570 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc
@@ -9,6 +9,7 @@
 //===----------------------------------------------------------------------===//
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data)
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_frame)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_demangle)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 3ec4d80105a245a..2345aee98554134 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -160,6 +160,15 @@ void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
 // Used by LLVMSymbolizer and InternalSymbolizer.
 void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
 
+// Parses repeated strings in the following format:
+//   <function_name>
+//   <var_name>
+//   <file_name>:<line_number>[:<column_number>]
+//   [<frame_offset>|??] [<size>|??] [<tag_offset>|??]
+// Used by LLVMSymbolizer and InternalSymbolizer.
+void ParseSymbolizeFrameOutput(const char *str,
+                               InternalMmapVector<LocalInfo> *locals);
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index d910aef3f741627..0d5af6ccdd2d858 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -385,8 +385,8 @@ void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
   str = ExtractUptr(str, "\n", &info->line);
 }
 
-static void ParseSymbolizeFrameOutput(const char *str,
-                                      InternalMmapVector<LocalInfo> *locals) {
+void ParseSymbolizeFrameOutput(const char *str,
+                               InternalMmapVector<LocalInfo> *locals) {
   if (internal_strncmp(str, "??", 2) == 0)
     return;
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index 724ad4722909f1e..d92349c04fffabd 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -324,6 +324,9 @@ __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
 __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
                            char *Buffer, int MaxLength);
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
+__sanitizer_symbolize_frame(const char *ModuleName, u64 ModuleOffset,
+                            char *Buffer, int MaxLength);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
 __sanitizer_symbolize_flush();
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
@@ -366,6 +369,16 @@ class InternalSymbolizer final : public SymbolizerTool {
     return result;
   }
 
+  bool SymbolizeFrame(uptr addr, FrameInfo *info) override {
+    if (&__sanitizer_symbolize_frame == nullptr)
+      return false;
+    bool result = __sanitizer_symbolize_frame(info->module, info->module_offset,
+                                              buffer_, sizeof(buffer_));
+    if (result)
+      ParseSymbolizeFrameOutput(buffer_, &info->locals);
+    return result;
+  }
+
   void Flush() override {
     if (&__sanitizer_symbolize_flush)
       __sanitizer_symbolize_flush();
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp
index ac27c0e25715f29..877191943fb5d49 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp
@@ -115,6 +115,29 @@ bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset,
                                         Result.c_str()) < MaxLength;
 }
 
+bool __sanitizer_symbolize_frame(const char *ModuleName, uint64_t ModuleOffset,
+                                 char *Buffer, int MaxLength) {
+  std::string Result;
+  {
+    llvm::symbolize::PrinterConfig Config = getDefaultPrinterConfig();
+    llvm::raw_string_ostream OS(Result);
+    llvm::symbolize::Request Request{ModuleName, ModuleOffset};
+    auto Printer = std::make_unique<llvm::symbolize::LLVMPrinter>(
+        OS, symbolize_error_handler(OS), Config);
+
+    // TODO: it is neccessary to set proper SectionIndex here.
+    // object::SectionedAddress::UndefSection works for only absolute addresses.
+    auto ResOrErr = getDefaultSymbolizer()->symbolizeFrame(
+        ModuleName,
+        {ModuleOffset, llvm::object::SectionedAddress::UndefSection});
+    if (!ResOrErr)
+      return false;
+    Printer->print(Request, ResOrErr.get());
+  }
+  return __sanitizer::internal_snprintf(Buffer, MaxLength, "%s",
+                                        Result.c_str()) < MaxLength;
+}
+
 void __sanitizer_symbolize_flush() {
   if (Symbolizer)
     Symbolizer->flush();
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index 524ddca1b9f3e4f..6eb9fa7abb7e0cc 100755
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -140,6 +140,7 @@ $AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o
 
 SYMBOLIZER_API_LIST=__sanitizer_symbolize_code
 SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data
+SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_frame
 SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush
 SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle
 SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_demangle
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index 5cafae54660ef9a..0a4bc6989a0d72a 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -59,6 +59,7 @@ __sanitizer_symbolize_code T
 __sanitizer_symbolize_data T
 __sanitizer_symbolize_demangle T
 __sanitizer_symbolize_flush T
+__sanitizer_symbolize_frame T
 __sanitizer_symbolize_set_demangle T
 __sanitizer_symbolize_set_inline_frames T
 __strdup U
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/internal_symbolizer.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/internal_symbolizer.cpp
index d4eb300cf3d900b..2421616676cface 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/internal_symbolizer.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/internal_symbolizer.cpp
@@ -17,6 +17,8 @@ bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset,
                                 bool SymbolizeInlineFrames);
 bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset,
                                 char *Buffer, int MaxLength);
+bool __sanitizer_symbolize_frame(const char *ModuleName, uint64_t ModuleOffset,
+                                 char *Buffer, int MaxLength);
 void __sanitizer_print_stack_trace();
 bool __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
                                     int MaxLength);
@@ -132,6 +134,40 @@ void TestData() {
   // CHECK-NEXT: internal_symbolizer.cpp:[[# @LINE - 13]]
 }
 
+__attribute__((noinline)) std::string SymbolizeLocalVars(FrameInfo frame) {
+  auto modul_offset = GetModuleAndOffset(frame.address);
+  char buffer[1024] = {};
+  ScopedInSymbolizer in_symbolizer;
+  __sanitizer_symbolize_frame(modul_offset.first, modul_offset.second, buffer,
+                              std::size(buffer));
+  return buffer;
+}
+
+__attribute__((
+    noinline,
+    no_sanitize_address /* Asan merges allocas destroying variable DI*/)) void
+TestFrame() {
+  volatile int var = 1;
+  void *address = GetPC();
+  fprintf(stderr, "%s: %s\n", __FUNCTION__,
+          SymbolizeLocalVars({
+                                 0,
+                                 "",
+                                 "",
+                                 reinterpret_cast<void *>(
+                                     reinterpret_cast<uintptr_t>(address)),
+                             })
+              .c_str());
+  // CHECK-LABEL: TestFrame: TestFrame
+  // CHECK-NEXT: var
+  // CHECK-NEXT: internal_symbolizer.cpp:[[# @LINE - 13]]
+  // CHECK-NEXT: {{-?[0-9]+ +[0-9]+}}
+  // CHECK-NEXT: TestFrame
+  // CHECK-NEXT: address
+  // CHECK-NEXT: internal_symbolizer.cpp:[[# @LINE - 16]]
+  // CHECK-NEXT: {{-?[0-9]+ +[0-9]+}}
+}
+
 void TestDemangle() {
   char out[128];
   assert(!__sanitizer_symbolize_demangle("1A", out, sizeof(out)));
@@ -149,5 +185,6 @@ int main() {
   TestNoInline();
   TestLongFunctionNames();
   TestData();
+  TestFrame();
   TestDemangle();
 }



More information about the llvm-commits mailing list