[compiler-rt] 5811f3a - [asan_symbolize] Fix bug handling C++ symbols when using Atos.

Dan Liew via llvm-commits llvm-commits at lists.llvm.org
Tue May 19 16:08:18 PDT 2020


Author: Dan Liew
Date: 2020-05-19T16:08:09-07:00
New Revision: 5811f3a9f8d300bfa764ffebedeb37fd13b73a4b

URL: https://github.com/llvm/llvm-project/commit/5811f3a9f8d300bfa764ffebedeb37fd13b73a4b
DIFF: https://github.com/llvm/llvm-project/commit/5811f3a9f8d300bfa764ffebedeb37fd13b73a4b.diff

LOG: [asan_symbolize] Fix bug handling C++ symbols when using Atos.

Summary:
The previous code tries to strip out parentheses and anything in between
them. I'm guessing the idea here was to try to drop any listed arguments
for the function being symbolized. Unfortunately this approach is broken
in several ways.

* Templated functions may contain parentheses. The existing approach
messes up these names.
* In C++ argument types are part of a function's signature for the
purposes of overloading so removing them could be confusing.

Fix this simply by not trying to adjust the function name that comes
from `atos`.

A test case is included.

Without the change the test case produced output like:

```
WRITE of size 4 at 0x6060000001a0 thread T0
    #0 0x10b96614d in IntWrapper<void >::operator=> const&) asan-symbolize-templated-cxx.cpp:10
    #1 0x10b960b0e in void writeToA<IntWrapper<void > >>) asan-symbolize-templated-cxx.cpp:30
    #2 0x10b96bf27 in decltype>)>> >)) std::__1::__invoke<void >), IntWrapper<void > >>), IntWrapper<void >&&) type_traits:4425
    #3 0x10b96bdc1 in void std::__1::__invoke_void_return_wrapper<void>::__call<void >), IntWrapper<void > >>), IntWrapper<void >&&) __functional_base:348
    #4 0x10b96bd71 in std::__1::__function::__alloc_func<void >), std::__1::allocator<void >)>, void >)>::operator>&&) functional:1533
    #5 0x10b9684e2 in std::__1::__function::__func<void >), std::__1::allocator<void >)>, void >)>::operator>&&) functional:1707
    #6 0x10b96cd7b in std::__1::__function::__value_func<void >)>::operator>&&) const functional:1860
    #7 0x10b96cc17 in std::__1::function<void >)>::operator>) const functional:2419
    #8 0x10b960ca6 in Foo<void >), IntWrapper<void > >::doCall>) asan-symbolize-templated-cxx.cpp:44
    #9 0x10b96088b in main asan-symbolize-templated-cxx.cpp:54
    #10 0x7fff6ffdfcc8 in start (in libdyld.dylib) + 0
```

Note how the symbol names for the frames are messed up (e.g. #8, #1).

With the patch the output looks like:

```
WRITE of size 4 at 0x6060000001a0 thread T0
    #0 0x10005214d in IntWrapper<void (int)>::operator=(IntWrapper<void (int)> const&) asan-symbolize-templated-cxx.cpp:10
    #1 0x10004cb0e in void writeToA<IntWrapper<void (int)> >(IntWrapper<void (int)>) asan-symbolize-templated-cxx.cpp:30
    #2 0x100057f27 in decltype(std::__1::forward<void (*&)(IntWrapper<void (int)>)>(fp)(std::__1::forward<IntWrapper<void (int)> >(fp0))) std::__1::__invoke<void (*&)(IntWrapper<void (int)>), IntWrapper<void (int)> >(void (*&)(IntWrapper<void (int)>), IntWrapper<void (int)>&&) type_traits:4425
    #3 0x100057dc1 in void std::__1::__invoke_void_return_wrapper<void>::__call<void (*&)(IntWrapper<void (int)>), IntWrapper<void (int)> >(void (*&)(IntWrapper<void (int)>), IntWrapper<void (int)>&&) __functional_base:348
    #4 0x100057d71 in std::__1::__function::__alloc_func<void (*)(IntWrapper<void (int)>), std::__1::allocator<void (*)(IntWrapper<void (int)>)>, void (IntWrapper<void (int)>)>::operator()(IntWrapper<void (int)>&&) functional:1533
    #5 0x1000544e2 in std::__1::__function::__func<void (*)(IntWrapper<void (int)>), std::__1::allocator<void (*)(IntWrapper<void (int)>)>, void (IntWrapper<void (int)>)>::operator()(IntWrapper<void (int)>&&) functional:1707
    #6 0x100058d7b in std::__1::__function::__value_func<void (IntWrapper<void (int)>)>::operator()(IntWrapper<void (int)>&&) const functional:1860
    #7 0x100058c17 in std::__1::function<void (IntWrapper<void (int)>)>::operator()(IntWrapper<void (int)>) const functional:2419
    #8 0x10004cca6 in Foo<void (IntWrapper<void (int)>), IntWrapper<void (int)> >::doCall(IntWrapper<void (int)>) asan-symbolize-templated-cxx.cpp:44
    #9 0x10004c88b in main asan-symbolize-templated-cxx.cpp:54
    #10 0x7fff6ffdfcc8 in start (in libdyld.dylib) + 0
```

rdar://problem/58887175

Reviewers: kubamracek, yln

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D79597

Added: 
    compiler-rt/test/asan/TestCases/Darwin/asan-symbolize-templated-cxx.cpp

Modified: 
    compiler-rt/lib/asan/scripts/asan_symbolize.py

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py
index a196c075b039..d99e3441e925 100755
--- a/compiler-rt/lib/asan/scripts/asan_symbolize.py
+++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py
@@ -275,11 +275,14 @@ def symbolize(self, addr, binary, offset):
       atos_line = self.atos.readline()
     # A well-formed atos response looks like this:
     #   foo(type1, type2) (in object.name) (filename.cc:80)
+    # NOTE:
+    #   * For C functions atos omits parentheses and argument types.
+    #   * For C++ functions the function name (i.e., `foo` above) may contain
+    #     templates which may contain parentheses.
     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
     logging.debug('atos_line: %s', atos_line)
     if match:
       function_name = match.group(1)
-      function_name = re.sub('\(.*?\)', '', function_name)
       file_name = fix_filename(match.group(3))
       return ['%s in %s %s' % (addr, function_name, file_name)]
     else:

diff  --git a/compiler-rt/test/asan/TestCases/Darwin/asan-symbolize-templated-cxx.cpp b/compiler-rt/test/asan/TestCases/Darwin/asan-symbolize-templated-cxx.cpp
new file mode 100644
index 000000000000..3d726a32b7ea
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/Darwin/asan-symbolize-templated-cxx.cpp
@@ -0,0 +1,62 @@
+// UNSUPPORTED: ios
+// RUN: %clangxx_asan -O0 -g %s -o %t.executable
+// RUN: %env_asan_opts="symbolize=0" not %run %t.executable > %t_no_module_map.log 2>&1
+// RUN: %asan_symbolize --force-system-symbolizer < %t_no_module_map.log 2>&1 | FileCheck %s
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <functional>
+
+// This test is deliberately convoluted so that there is a function call
+// in the stack trace that contains nested parentheses.
+
+template <class CallBackTy>
+class IntWrapper {
+  int value_;
+  std::function<CallBackTy> callback_;
+
+public:
+  IntWrapper(int value, std::function<CallBackTy> callback) : value_(value), callback_(callback) {}
+  int &operator=(const int &new_value) {
+    value_ = new_value;
+    callback_(value_);
+  }
+};
+
+using IntW = IntWrapper<void(int)>;
+IntW *a;
+
+template <class T>
+void writeToA(T new_value) {
+  // CHECK: heap-use-after-free
+  // NOTE: atos seems to emit the `void` return type here for some reason.
+  // CHECK: #{{[0-9]+}} 0x{{.+}} in {{(void +)?}}writeToA<IntWrapper<void{{ *}}(int)>{{ *}}>(IntWrapper<void{{ *}}(int)>) asan-symbolize-templated-cxx.cpp:[[@LINE+1]]
+  *a = new_value;
+}
+
+extern "C" void callback(int new_value) {
+  printf("new value is %d\n", new_value);
+}
+
+template <class T, class V>
+struct Foo {
+  std::function<T> call;
+  Foo(std::function<T> c) : call(c) {}
+  void doCall(V new_value) {
+    // CHECK: #{{[0-9]+}} 0x{{.+}} in Foo<void (IntWrapper<void{{ *}}(int)>),{{ *}}IntWrapper<void{{ *}}(int)>{{ *}}>::doCall(IntWrapper<void{{ *}}(int)>) asan-symbolize-templated-cxx.cpp:[[@LINE+1]]
+    call(new_value);
+  }
+};
+
+int main() {
+  a = new IntW(0, callback);
+  assert(a);
+  // Foo<void(IntWrapper<void(int)>)>
+  // This type is deliberately convoluted so that the demangled type contains nested parentheses.
+  // In particular trying to match parentheses using a least-greedy regex approach will fail.
+  Foo<void(IntW), IntW> foo(writeToA<IntW>);
+  delete a;
+  // CHECK: #{{[0-9]+}} 0x{{.+}} in main asan-symbolize-templated-cxx.cpp:[[@LINE+1]]
+  foo.doCall(IntW(5, callback)); // BOOM
+  return 0;
+}


        


More information about the llvm-commits mailing list