[compiler-rt] 13e9c1d - Reland "[ASan] Use debuginfo for symbolization."

Mitch Phillips via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 16 13:59:12 PDT 2022


Author: Mitch Phillips
Date: 2022-06-16T13:58:55-07:00
New Revision: 13e9c1d18e2e97f643353d38b3b58858a5a823d9

URL: https://github.com/llvm/llvm-project/commit/13e9c1d18e2e97f643353d38b3b58858a5a823d9
DIFF: https://github.com/llvm/llvm-project/commit/13e9c1d18e2e97f643353d38b3b58858a5a823d9.diff

LOG: Reland "[ASan] Use debuginfo for symbolization."

This reverts commit 99796d06dbe11c8f81376ad1d42e7f17d2eff6ae.

Hint: Looking here because your manual invocation of something in
'check-asan' broke? You need a new symbolizer (after D123538).

An upcoming patch will remove the internal metadata for global
variables. With D123534 and D123538, clang now emits DWARF debug info
for constant strings (the only global variable type it was missing), and
llvm-symbolizer is now able to symbolize all global variable addresses
(where previously it wouldn't give you the file:line information).

Move ASan's runtime over from the internal metadata to DWARF.

Differential Revision: https://reviews.llvm.org/D127552

Added: 
    compiler-rt/test/asan/TestCases/global-location-nodebug.cpp

Modified: 
    compiler-rt/lib/asan/asan_globals.cpp
    compiler-rt/lib/asan/asan_interface_internal.h
    compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
    compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp
    compiler-rt/test/asan/TestCases/Windows/global_const_string_oob.cpp
    compiler-rt/test/asan/TestCases/global-location.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index ecc2600f039a1..29eef6c68a516 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -86,10 +86,11 @@ static void ReportGlobal(const Global &g, const char *prefix) {
       "odr_indicator=%p\n",
       prefix, (void *)&g, (void *)g.beg, g.size, g.size_with_redzone, g.name,
       g.module_name, g.has_dynamic_init, (void *)g.odr_indicator);
-  if (g.location) {
-    Report("  location (%p): name=%s[%p], %d %d\n", (void *)g.location,
-           g.location->filename, (void *)g.location->filename,
-           g.location->line_no, g.location->column_no);
+
+  DataInfo info;
+  Symbolizer::GetOrInit()->SymbolizeData(g.beg, &info);
+  if (info.line != 0) {
+    Report("  location: name=%s, %d\n", info.file, info.line);
   }
 }
 
@@ -295,19 +296,15 @@ void PrintGlobalNameIfASCII(InternalScopedString *str, const __asan_global &g) {
               (char *)g.beg);
 }
 
-static const char *GlobalFilename(const __asan_global &g) {
-  const char *res = g.module_name;
-  // Prefer the filename from source location, if is available.
-  if (g.location) res = g.location->filename;
-  CHECK(res);
-  return res;
-}
-
 void PrintGlobalLocation(InternalScopedString *str, const __asan_global &g) {
-  str->append("%s", GlobalFilename(g));
-  if (!g.location) return;
-  if (g.location->line_no) str->append(":%d", g.location->line_no);
-  if (g.location->column_no) str->append(":%d", g.location->column_no);
+  DataInfo info;
+  Symbolizer::GetOrInit()->SymbolizeData(g.beg, &info);
+
+  if (info.line != 0) {
+    str->append("%s:%d", info.file, info.line);
+  } else {
+    str->append("%s", g.module_name);
+  }
 }
 
 } // namespace __asan

diff  --git a/compiler-rt/lib/asan/asan_interface_internal.h b/compiler-rt/lib/asan/asan_interface_internal.h
index 3e6e660288746..b0802a89ddbde 100644
--- a/compiler-rt/lib/asan/asan_interface_internal.h
+++ b/compiler-rt/lib/asan/asan_interface_internal.h
@@ -53,8 +53,9 @@ extern "C" {
     const char *module_name; // Module name as a C string. This pointer is a
                              // unique identifier of a module.
     uptr has_dynamic_init;   // Non-zero if the global has dynamic initializer.
-    __asan_global_source_location *location;  // Source location of a global,
-                                              // or NULL if it is unknown.
+    uptr windows_padding;    // TODO: Figure out how to remove this padding
+                             // that's simply here to make the MSVC incremental
+                             // linker happy...
     uptr odr_indicator;      // The address of the ODR indicator symbol.
   };
 

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 16cb65e1aac96..461fe96613688 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -363,14 +363,21 @@ void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
   }
 }
 
-// Parses a two-line string in the following format:
+// Parses a two- or three-line string in the following format:
 //   <symbol_name>
 //   <start_address> <size>
-// Used by LLVMSymbolizer and InternalSymbolizer.
+//   <filename>:<column>
+// Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
+// for symbolizing the third line in D123538, but we support the older two-line
+// information as well.
 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
   str = ExtractToken(str, "\n", &info->name);
   str = ExtractUptr(str, " ", &info->start);
   str = ExtractUptr(str, "\n", &info->size);
+  // Note: If the third line isn't present, these calls will set info.{file,
+  // line} to empty strings.
+  str = ExtractToken(str, ":", &info->file);
+  str = ExtractUptr(str, "\n", &info->line);
 }
 
 static void ParseSymbolizeFrameOutput(const char *str,

diff  --git a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp
index 478568cd122c2..1c8ea99430df2 100644
--- a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp
@@ -7,15 +7,15 @@
 // pointers. This setting is not on by default because it's too expensive.
 //
 // Different size: detect a bug if detect_odr_violation>=1
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib
-// RUN: %clangxx_asan %s %ld_flags_rpath_exe -o %t-ODR-EXE
+// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib
+// RUN: %clangxx_asan -g %s %ld_flags_rpath_exe -o %t-ODR-EXE
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1 not %run %t-ODR-EXE 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=0     %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0                        not %run %t-ODR-EXE 2>&1 | FileCheck %s
 //
 // Same size: report a bug only if detect_odr_violation>=2.
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100
+// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1     %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0                        not %run %t-ODR-EXE 2>&1 | FileCheck %s
@@ -26,18 +26,18 @@
 // RUN: rm -f %t.supp
 //
 // Use private aliases for global variables without indicator symbol.
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %dynamiclib -DSZ=100
-// RUN: %clangxx_asan -mllvm -asan-use-private-alias %s %ld_flags_rpath_exe -o %t-ODR-EXE
+// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %dynamiclib -DSZ=100
+// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias %s %ld_flags_rpath_exe -o %t-ODR-EXE
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
 
 // Use private aliases for global variables: use indicator symbol to detect ODR violation.
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator  %s -o %dynamiclib -DSZ=100
-// RUN: %clangxx_asan -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE
+// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator  %s -o %dynamiclib -DSZ=100
+// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s
 
 // Same as above but with clang switches.
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared -fsanitize-address-use-odr-indicator %s -o %dynamiclib -DSZ=100
-// RUN: %clangxx_asan -fsanitize-address-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE
+// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -fsanitize-address-use-odr-indicator %s -o %dynamiclib -DSZ=100
+// RUN: %clangxx_asan -g -fsanitize-address-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s
 
 // GNU driver doesn't handle .so files properly.
@@ -55,7 +55,7 @@ namespace foo { char G[SZ]; }
 #include <stdio.h>
 namespace foo { char G[100]; }
 // CHECK: ERROR: AddressSanitizer: odr-violation
-// CHECK: size=100 'foo::G' {{.*}}odr-violation.cpp:[[@LINE-2]]:22
+// CHECK: size=100 'foo::G' {{.*}}odr-violation.cpp:[[@LINE-2]]
 // CHECK: size={{4|100}} 'foo::G'
 int main(int argc, char **argv) {
   printf("PASS: %p\n", &foo::G);

diff  --git a/compiler-rt/test/asan/TestCases/Windows/global_const_string_oob.cpp b/compiler-rt/test/asan/TestCases/Windows/global_const_string_oob.cpp
index 241d14f42a299..2c5c8644149e5 100644
--- a/compiler-rt/test/asan/TestCases/Windows/global_const_string_oob.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/global_const_string_oob.cpp
@@ -9,12 +9,12 @@ extern "C" const char *foo = "foobarspam";
 int main(void) {
   if (foo[16])
     printf("Boo\n");
-// CHECK-NOT: Boo
-// CHECK: AddressSanitizer: global-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
-// CHECK: READ of size 1 at [[ADDR]] thread T0
-// CHECK-NEXT:   {{#0 .* main .*global_const_string_oob.cpp:}}[[@LINE-5]]
-// CHECK: [[ADDR]] is located 5 bytes to the right of global variable [[STR:.*]] defined in {{'.*global_const_string_oob.cpp:7:.*' .*}} of size 11
-// CHECK:   [[STR]] is ascii string 'foobarspam'
+  // CHECK-NOT: Boo
+  // CHECK: AddressSanitizer: global-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+  // CHECK: READ of size 1 at [[ADDR]] thread T0
+  // CHECK-NEXT:   {{#0 .* main .*global_const_string_oob.cpp:}}[[@LINE-5]]
+  // CHECK: [[ADDR]] is located 5 bytes to the right of global variable [[STR:.*]] defined in {{'.*global_const_string_oob.cpp' .*}} of size 11
+  // CHECK:   [[STR]] is ascii string 'foobarspam'
   return 0;
 }
 

diff  --git a/compiler-rt/test/asan/TestCases/global-location-nodebug.cpp b/compiler-rt/test/asan/TestCases/global-location-nodebug.cpp
new file mode 100644
index 0000000000000..c69624a1791e3
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/global-location-nodebug.cpp
@@ -0,0 +1,16 @@
+/// Same as global-location.cpp, but without debuginfo. In a separate file to
+/// allow this test to also run on Windows (which can't be done for the
+/// debuginfo variant).
+
+// RUN: %clangxx_asan -O2 %S/global-location.cpp -o %t -Wl,-S
+// RUN: not %run %t g 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GLOB-NO-G
+// RUN: not %run %t c 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CLASS_STATIC-NO-G
+// RUN: not %run %t f 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FUNC_STATIC-NO-G
+// RUN: not %run %t l 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LITERAL-NO-G
+
+// CHECK: AddressSanitizer: global-buffer-overflow
+// CLASS_STATIC-NO-G: 0x{{.*}} is located 4 bytes to the right of global variable 'C::array' defined in '{{.*}}global-location.cpp' {{.*}} of size 40
+// GLOB-NO-G: 0x{{.*}} is located 4 bytes to the right of global variable 'global' defined in '{{.*}}global-location.cpp' {{.*}} of size 40
+// FUNC_STATIC-NO-G: 0x{{.*}} is located 4 bytes to the right of global variable 'array' defined in '{{.*}}global-location.cpp' {{.*}} of size 40
+// LITERAL-NO-G: 0x{{.*}} is located 0 bytes to the right of global variable {{.*}} defined in '{{.*}}global-location.cpp' {{.*}} of size 11
+// CHECK: SUMMARY: AddressSanitizer: global-buffer-overflow

diff  --git a/compiler-rt/test/asan/TestCases/global-location.cpp b/compiler-rt/test/asan/TestCases/global-location.cpp
index 42711f97c21b9..3cf1791c800ce 100644
--- a/compiler-rt/test/asan/TestCases/global-location.cpp
+++ b/compiler-rt/test/asan/TestCases/global-location.cpp
@@ -1,21 +1,24 @@
-// RUN: %clangxx_asan -O2 %s -o %t
+// RUN: %clangxx_asan -g -O2 %s -o %t
 // RUN: not %run %t g 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GLOB
 // RUN: not %run %t c 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CLASS_STATIC
 // RUN: not %run %t f 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FUNC_STATIC
 // RUN: not %run %t l 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LITERAL
 
+// COFF doesn't support debuginfo for globals. For the non-debuginfo tests, see global-location-nodebug.cpp.
+// XFAIL: windows-msvc
+
 // CHECK: AddressSanitizer: global-buffer-overflow
 
 #include <string.h>
 
 struct C {
   static int array[10];
+  // CLASS_STATIC:      0x{{.*}} is located 4 bytes to the right of global variable 'C::array' defined in '{{.*}}global-location.cpp:[[@LINE-1]]' {{.*}} of size 40
 };
 
 int global[10];
-// GLOB: 0x{{.*}} is located 4 bytes to the right of global variable 'global' defined in '{{.*}}global-location.cpp:[[@LINE-1]]:5' {{.*}} of size 40
+// GLOB:      0x{{.*}} is located 4 bytes to the right of global variable 'global' defined in '{{.*}}global-location.cpp:[[@LINE-1]]' {{.*}} of size 40
 int C::array[10];
-// CLASS_STATIC: 0x{{.*}} is located 4 bytes to the right of global variable 'C::array' defined in '{{.*}}global-location.cpp:[[@LINE-1]]:8' {{.*}} of size 40
 
 int main(int argc, char **argv) {
   int one = argc - 1;
@@ -24,12 +27,12 @@ int main(int argc, char **argv) {
   case 'c': return C::array[one * 11];
   case 'f':
     static int array[10];
-    // FUNC_STATIC: 0x{{.*}} is located 4 bytes to the right of global variable 'array' defined in '{{.*}}global-location.cpp:[[@LINE-1]]:16' {{.*}} of size 40
+    // FUNC_STATIC:      0x{{.*}} is located 4 bytes to the right of global variable 'array' defined in '{{.*}}global-location.cpp:[[@LINE-1]]' {{.*}} of size 40
     memset(array, 0, 10);
     return array[one * 11];
   case 'l':
     const char *str = "0123456789";
-    // LITERAL: 0x{{.*}} is located 0 bytes to the right of global variable {{.*}} defined in '{{.*}}global-location.cpp:[[@LINE-1]]:23' {{.*}} of size 11
+    // LITERAL:      0x{{.*}} is located 0 bytes to the right of global variable {{.*}} defined in '{{.*}}global-location.cpp:[[@LINE-1]]' {{.*}} of size 11
     return str[one * 11];
   }
   return 0;


        


More information about the llvm-commits mailing list