[compiler-rt] 9ef451d - [hwasan] Offline symbolization script.
Evgenii Stepanov via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 13:46:32 PST 2019
Author: Evgenii Stepanov
Date: 2019-12-09T13:46:11-08:00
New Revision: 9ef451d1fdaa9a1cadf1a99c3540253a0e9c118d
URL: https://github.com/llvm/llvm-project/commit/9ef451d1fdaa9a1cadf1a99c3540253a0e9c118d
DIFF: https://github.com/llvm/llvm-project/commit/9ef451d1fdaa9a1cadf1a99c3540253a0e9c118d.diff
LOG: [hwasan] Offline symbolization script.
Summary:
A script to symbolize hwasan reports after the fact using unstripped
binaries. Supports stack-based reports. Requires llvm-symbolizer
(addr2line is not an option).
Reviewers: pcc, hctim
Subscribers: mgorny, #sanitizers, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D71148
Added:
compiler-rt/lib/hwasan/scripts/CMakeLists.txt
compiler-rt/lib/hwasan/scripts/hwasan_symbolize
Modified:
compiler-rt/lib/hwasan/CMakeLists.txt
Removed:
################################################################################
diff --git a/compiler-rt/lib/hwasan/CMakeLists.txt b/compiler-rt/lib/hwasan/CMakeLists.txt
index 636392fa8ae2..03863e4be68d 100644
--- a/compiler-rt/lib/hwasan/CMakeLists.txt
+++ b/compiler-rt/lib/hwasan/CMakeLists.txt
@@ -181,6 +181,8 @@ endforeach()
add_compiler_rt_resource_file(hwasan_blacklist hwasan_blacklist.txt hwasan)
+add_subdirectory("scripts")
+
# if(COMPILER_RT_INCLUDE_TESTS)
# add_subdirectory(tests)
# endif()
diff --git a/compiler-rt/lib/hwasan/scripts/CMakeLists.txt b/compiler-rt/lib/hwasan/scripts/CMakeLists.txt
new file mode 100644
index 000000000000..68c8375c75ed
--- /dev/null
+++ b/compiler-rt/lib/hwasan/scripts/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_compiler_rt_script(hwasan_symbolize)
+add_dependencies(hwasan hwasan_symbolize)
diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
new file mode 100755
index 000000000000..f77e36fbd622
--- /dev/null
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https:#llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+#
+# HWAddressSanitizer offline symbolization script.
+#
+#===------------------------------------------------------------------------===#
+import glob
+import os
+import re
+import sys
+import string
+import subprocess
+import argparse
+
+last_access_address = None
+last_access_tag = None
+
+class Symbolizer:
+ def __init__(self, path, binary_prefixes, paths_to_cut):
+ self.__pipe = None
+ self.__path = path
+ self.__binary_prefixes = binary_prefixes
+ self.__paths_to_cut = paths_to_cut
+ self.__log = False
+
+ def enable_logging(self, enable):
+ self.__log = enable
+
+ def __open_pipe(self):
+ if not self.__pipe:
+ self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+ class __EOF:
+ pass
+
+ def __write(self, s):
+ print >>self.__pipe.stdin, s
+ if self.__log:
+ print >>sys.stderr, ("#>> |%s|" % (s,))
+
+ def __read(self):
+ s = self.__pipe.stdout.readline().rstrip()
+ if self.__log:
+ print >>sys.stderr, ("# << |%s|" % (s,))
+ if s == '':
+ raise Symbolizer.__EOF
+ return s
+
+ def __process_source_path(self, file_name):
+ for path_to_cut in self.__paths_to_cut:
+ file_name = re.sub(".*" + path_to_cut, "", file_name)
+ file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
+ file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
+ file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
+ return file_name
+
+ def __process_binary_name(self, name):
+ if name.startswith('/'):
+ name = name[1:]
+ for p in self.__binary_prefixes:
+ full_path = os.path.join(p, name)
+ if os.path.exists(full_path):
+ return full_path
+ # Try stripping extra path components as the last resort.
+ for p in self.__binary_prefixes:
+ full_path = os.path.join(p, os.path.basename(name))
+ if os.path.exists(full_path):
+ return full_path
+ print >>sys.stderr, "Could not find symbols for", name
+ return None
+
+ def iter_locals(self, binary, addr):
+ self.__open_pipe()
+ p = self.__pipe
+ binary = self.__process_binary_name(binary)
+ if not binary:
+ return
+ self.__write("FRAME %s %s" % (binary, addr))
+ try:
+ while True:
+ function_name = self.__read()
+ local_name = self.__read()
+ file_line = self.__read()
+ extra = self.__read().split()
+
+ file_line = self.__process_source_path(file_line)
+ offset = None if extra[0] == '??' else int(extra[0])
+ size = None if extra[1] == '??' else int(extra[1])
+ tag_offset = None if extra[2] == '??' else int(extra[2])
+ yield (function_name, file_line, local_name, offset, size, tag_offset)
+ except Symbolizer.__EOF:
+ pass
+
+ def iter_call_stack(self, binary, addr):
+ self.__open_pipe()
+ p = self.__pipe
+ binary = self.__process_binary_name(binary)
+ if not binary:
+ return
+ self.__write("CODE %s %s" % (binary, addr))
+ try:
+ while True:
+ function_name = self.__read()
+ file_line = self.__read()
+ file_line = self.__process_source_path(file_line)
+ yield (function_name, file_line)
+ except Symbolizer.__EOF:
+ pass
+
+def symbolize_line(line, symbolizer_path):
+ #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
+ match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
+ if match:
+ frameno = match.group(2)
+ binary = match.group(5)
+ addr = int(match.group(6), 16)
+
+ frames = list(symbolizer.iter_call_stack(binary, addr))
+
+ if len(frames) > 0:
+ print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'),
+ match.group(3).encode('utf-8'), frames[0][0], frames[0][1])
+ for i in range(1, len(frames)):
+ space1 = ' ' * match.end(1)
+ space2 = ' ' * (match.start(4) - match.end(1) - 2)
+ print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])
+ else:
+ print line.rstrip().encode('utf-8')
+ else:
+ print line.rstrip().encode('utf-8')
+
+def save_access_address(line):
+ global last_access_address, last_access_tag
+ match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
+ if match:
+ last_access_address = int(match.group(2), 16)
+ match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]+ tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
+ if match:
+ last_access_tag = int(match.group(2), 16)
+
+def process_stack_history(line, symbolizer, ignore_tags=False):
+ if last_access_address is None or last_access_tag is None:
+ return
+ if re.match(r'Previously allocated frames:', line, re.UNICODE):
+ return True
+ pc_mask = (1 << 48) - 1
+ fp_mask = (1 << 20) - 1
+ # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD)
+ match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
+ if match:
+ record_addr = int(match.group(2), 16)
+ record = int(match.group(3), 16)
+ binary = match.group(4)
+ addr = int(match.group(5), 16)
+ base_tag = (record_addr >> 3) & 0xFF
+ fp = (record >> 48) << 4
+ pc = record & pc_mask
+
+ for local in symbolizer.iter_locals(binary, addr):
+ frame_offset = local[3]
+ size = local[4]
+ if frame_offset is None or size is None:
+ continue
+ obj_offset = (last_access_address - fp - frame_offset) & fp_mask
+ if obj_offset >= size:
+ continue
+ tag_offset = local[5]
+ if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
+ continue
+ print ''
+ print 'Potentially referenced stack object:'
+ print ' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])
+ print ' at %s' % (local[1],)
+ return True
+ return False
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-d', action='store_true')
+parser.add_argument('-v', action='store_true')
+parser.add_argument('--ignore-tags', action='store_true')
+parser.add_argument('--symbols', action='append')
+parser.add_argument('--source', action='append')
+parser.add_argument('--symbolizer')
+parser.add_argument('args', nargs=argparse.REMAINDER)
+args = parser.parse_args()
+
+# Unstripped binaries location.
+binary_prefixes = args.symbols or []
+if not binary_prefixes:
+ if 'ANDROID_PRODUCT_OUT' in os.environ:
+ product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
+ binary_prefixes.append(product_out)
+
+for p in binary_prefixes:
+ if not os.path.isdir(p):
+ print >>sys.stderr, "Symbols path does not exist or is not a directory:", p
+ sys.exit(1)
+
+# Source location.
+paths_to_cut = args.source or []
+if not paths_to_cut:
+ paths_to_cut.append(os.getcwd() + '/')
+ if 'ANDROID_BUILD_TOP' in os.environ:
+ paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
+
+# llvm-symbolizer binary.
+# 1. --symbolizer flag
+# 2. environment variable
+# 3. unsuffixed binary in the current directory
+# 4. if inside Android platform, prebuilt binary at a known path
+# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
+# highest available version in $PATH
+symbolizer_path = args.symbolizer
+if not symbolizer_path:
+ if 'LLVM_SYMBOLIZER_PATH' in os.environ:
+ symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
+ elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
+ symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
+
+if not symbolizer_path:
+ s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
+ if os.path.exists(s):
+ symbolizer_path = s
+
+if not symbolizer_path:
+ if 'ANDROID_BUILD_TOP' in os.environ:
+ s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
+ if os.path.exists(s):
+ symbolizer_path = s
+
+if not symbolizer_path:
+ for path in os.environ["PATH"].split(os.pathsep):
+ p = os.path.join(path, 'llvm-symbolizer')
+ if os.path.exists(p):
+ symbolizer_path = p
+ break
+
+def extract_version(s):
+ idx = s.rfind('-')
+ if idx == -1:
+ return 0
+ x = float(s[idx + 1:])
+ return x
+
+if not symbolizer_path:
+ for path in os.environ["PATH"].split(os.pathsep):
+ candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
+ if len(candidates) > 0:
+ candidates.sort(key = extract_version, reverse = True)
+ symbolizer_path = candidates[0]
+ break
+
+if not os.path.exists(symbolizer_path):
+ print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path
+ sys.exit(1)
+
+if args.v:
+ print "Looking for symbols in:"
+ for s in binary_prefixes:
+ print " %s" % (s,)
+ print "Stripping source path prefixes:"
+ for s in paths_to_cut:
+ print " %s" % (s,)
+ print "Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,)
+ print
+
+symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
+symbolizer.enable_logging(args.d)
+
+for line in sys.stdin:
+ line = line.decode('utf-8')
+ save_access_address(line)
+ if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
+ continue
+ symbolize_line(line, symbolizer_path)
More information about the llvm-commits
mailing list