[llvm-commits] [compiler-rt] r164695 - /compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py
Alexander Potapenko
glider at google.com
Wed Sep 26 06:16:42 PDT 2012
Author: glider
Date: Wed Sep 26 08:16:42 2012
New Revision: 164695
URL: http://llvm.org/viewvc/llvm-project?rev=164695&view=rev
Log:
Fixed a number of gpylint warnings, added binary names filtering (useful for Chrome), minor fixes.
Modified:
compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py
Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?rev=164695&r1=164694&r2=164695&view=diff
==============================================================================
--- compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py (original)
+++ compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Wed Sep 26 08:16:42 2012
@@ -10,8 +10,8 @@
import bisect
import os
import re
-import sys
import subprocess
+import sys
llvm_symbolizer = None
symbolizers = {}
@@ -23,23 +23,25 @@
# FIXME: merge the code that calls fix_filename().
def fix_filename(file_name):
for path_to_cut in sys.argv[1:]:
- file_name = re.sub(".*" + path_to_cut, "", file_name)
- file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
- file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
+ file_name = re.sub('.*' + path_to_cut, '', file_name)
+ file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
+ file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
return file_name
class Symbolizer(object):
def __init__(self):
pass
+
def symbolize(self, addr, binary, offset):
- """
- Overrided in subclasses.
- Args:
+ """Symbolize the given address (pair of binary and offset).
+
+ Overriden in subclasses.
+ Args:
addr: virtual address of an instruction.
binary: path to executable/shared object containing this instruction.
offset: instruction offset in the @binary.
- Returns:
+ Returns:
list of strings (one string for each inlined frame) describing
the code locations for this instruction (that is, function name, file
name, line and column numbers).
@@ -52,51 +54,54 @@
super(LLVMSymbolizer, self).__init__()
self.symbolizer_path = symbolizer_path
self.pipe = self.open_llvm_symbolizer()
+
def open_llvm_symbolizer(self):
if not os.path.exists(self.symbolizer_path):
return None
cmd = [self.symbolizer_path,
- "--use-symbol-table=true",
- "--demangle=false",
- "--functions=true",
- "--inlining=true"]
+ '--use-symbol-table=true',
+ '--demangle=false',
+ '--functions=true',
+ '--inlining=true']
if DEBUG:
print ' '.join(cmd)
return subprocess.Popen(cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
def symbolize(self, addr, binary, offset):
- """Overrides Symbolizer.symbolize"""
+ """Overrides Symbolizer.symbolize."""
if not self.pipe:
return None
result = []
try:
- symbolizer_input = "%s %s" % (binary, offset)
+ symbolizer_input = '%s %s' % (binary, offset)
+ if DEBUG:
+ print symbolizer_input
print >> self.pipe.stdin, symbolizer_input
while True:
function_name = self.pipe.stdout.readline().rstrip()
- if (function_name == ""):
+ if not function_name:
break
file_name = self.pipe.stdout.readline().rstrip()
file_name = fix_filename(file_name)
- if (not function_name.startswith("??") and
- not file_name.startswith("??")):
+ if (not function_name.startswith('??') and
+ not file_name.startswith('??')):
# Append only valid frames.
- result.append("%s in %s %s" % (addr, function_name,
+ result.append('%s in %s %s' % (addr, function_name,
file_name))
except Exception:
result = []
- if len(result) == 0:
+ if not result:
result = None
return result
def LLVMSymbolizerFactory(system):
- if system == "Linux":
- symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
+ if system == 'Linux':
+ symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
if not symbolizer_path:
# Assume llvm-symbolizer is in PATH.
- symbolizer_path = "llvm-symbolizer"
+ symbolizer_path = 'llvm-symbolizer'
return LLVMSymbolizer(symbolizer_path)
return None
@@ -106,42 +111,45 @@
super(Addr2LineSymbolizer, self).__init__()
self.binary = binary
self.pipe = self.open_addr2line()
+
def open_addr2line(self):
- cmd = ["addr2line", "-f", "-e", self.binary]
+ cmd = ['addr2line', '-f', '-e', self.binary]
if DEBUG:
print ' '.join(cmd)
return subprocess.Popen(cmd,
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
def symbolize(self, addr, binary, offset):
- """Overrides Symbolizer.symbolize"""
+ """Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
try:
print >> self.pipe.stdin, offset
function_name = self.pipe.stdout.readline().rstrip()
- file_name = self.pipe.stdout.readline().rstrip()
+ file_name = self.pipe.stdout.readline().rstrip()
except Exception:
- function_name = ""
- file_name = ""
+ function_name = ''
+ file_name = ''
file_name = fix_filename(file_name)
- return ["%s in %s %s" % (addr, function_name, file_name)]
+ return ['%s in %s %s' % (addr, function_name, file_name)]
class DarwinSymbolizer(Symbolizer):
def __init__(self, addr, binary):
super(DarwinSymbolizer, self).__init__()
self.binary = binary
- # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
+ # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
if len(addr) > 10:
- self.arch = "x86_64"
+ self.arch = 'x86_64'
else:
- self.arch = "i386"
+ self.arch = 'i386'
self.vmaddr = None
self.pipe = None
+
def get_binary_vmaddr(self):
- """
- Get the slide value to be added to the address.
- We're ooking for the following piece in otool -l output:
+ """Get the slide value to be added to the address.
+
+ We're looking for the following piece in otool -l output:
Load command 0
cmd LC_SEGMENT
cmdsize 736
@@ -150,13 +158,13 @@
"""
if self.vmaddr:
return self.vmaddr
- cmdline = ["otool", "-l", self.binary]
+ cmdline = ['otool', '-l', self.binary]
pipe = subprocess.Popen(cmdline,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
is_text = False
vmaddr = 0
- for line in pipe.stdout.readlines():
+ for line in pipe.stdout:
line = line.strip()
if line.startswith('segname'):
is_text = (line == 'segname __TEXT')
@@ -167,19 +175,22 @@
break
self.vmaddr = vmaddr
return self.vmaddr
+
def write_addr_to_pipe(self, offset):
slide = self.get_binary_vmaddr()
- print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
+ print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide)
+
def open_atos(self):
if DEBUG:
- print "atos -o %s -arch %s" % (self.binary, self.arch)
- cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
+ print 'atos -o %s -arch %s' % (self.binary, self.arch)
+ cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
self.pipe = subprocess.Popen(cmdline,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
+
def symbolize(self, addr, binary, offset):
- """Overrides Symbolizer.symbolize"""
+ """Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
self.open_atos()
@@ -190,14 +201,14 @@
# foo(type1, type2) (in object.name) (filename.cc:80)
match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
if DEBUG:
- print "atos_line: ", atos_line
+ print 'atos_line: ', atos_line
if match:
function_name = match.group(1)
- function_name = re.sub("\(.*?\)", "", function_name)
+ function_name = re.sub('\(.*?\)', '', function_name)
file_name = fix_filename(match.group(3))
- return ["%s in %s %s" % (addr, function_name, file_name)]
+ return ['%s in %s %s' % (addr, function_name, file_name)]
else:
- return ["%s in %s" % (addr, atos_line)]
+ return ['%s in %s' % (addr, atos_line)]
# Chain several symbolizers so that if one symbolizer fails, we fall back
@@ -206,20 +217,22 @@
def __init__(self, symbolizer_list):
super(ChainSymbolizer, self).__init__()
self.symbolizer_list = symbolizer_list
+
def symbolize(self, addr, binary, offset):
- """Overrides Symbolizer.symbolize"""
+ """Overrides Symbolizer.symbolize."""
for symbolizer in self.symbolizer_list:
if symbolizer:
result = symbolizer.symbolize(addr, binary, offset)
if result:
return result
return None
+
def append_symbolizer(self, symbolizer):
self.symbolizer_list.append(symbolizer)
-def BreakpadSymbolizerFactory(addr, binary):
- suffix = os.getenv("BREAKPAD_SUFFIX")
+def BreakpadSymbolizerFactory(binary):
+ suffix = os.getenv('BREAKPAD_SUFFIX')
if suffix:
filename = binary + suffix
if os.access(filename, os.F_OK):
@@ -249,6 +262,7 @@
self.debug_id = fragments[3]
self.binary = ' '.join(fragments[4:])
self.parse_lines(lines[1:])
+
def parse_lines(self, lines):
cur_function_addr = ''
for line in lines:
@@ -274,6 +288,7 @@
int(fragments[2]),
int(fragments[3]))
self.address_list.sort()
+
def get_sym_file_line(self, addr):
key = None
if addr in self.addresses.keys():
@@ -291,13 +306,14 @@
return symbol, filename, line_no
else:
return None
+
def symbolize(self, addr, binary, offset):
if self.binary != binary:
return None
res = self.get_sym_file_line(int(offset, 16))
if res:
function_name, file_name, line_no = res
- result = ["%s in %s %s:%d" % (
+ result = ['%s in %s %s:%d' % (
addr, function_name, file_name, line_no)]
print result
return result
@@ -306,19 +322,23 @@
class SymbolizationLoop(object):
- def __init__(self):
+ def __init__(self, binary_name_filter=None):
+ # Used by clients who may want to supply a different binary name.
+ # E.g. in Chrome several binaries may share a single .dSYM.
+ self.binary_name_filter = binary_name_filter
self.system = os.uname()[0]
if self.system in ['Linux', 'Darwin']:
self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
else:
- raise Exception("Unknown system")
+ raise Exception('Unknown system')
+
def symbolize_address(self, addr, binary, offset):
# Use the chain of symbolizers:
# Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
# (fall back to next symbolizer if the previous one fails).
- if not symbolizers.has_key(binary):
+ if not binary in symbolizers:
symbolizers[binary] = ChainSymbolizer(
- [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer])
+ [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
result = symbolizers[binary].symbolize(addr, binary, offset)
if result is None:
# Initialize system symbolizer only if other symbolizers failed.
@@ -326,33 +346,44 @@
SystemSymbolizerFactory(self.system, addr, binary))
result = symbolizers[binary].symbolize(addr, binary, offset)
# The system symbolizer must produce some result.
- assert(result)
+ assert result
return result
- def loop(self):
- frame_no = 0
+
+ def print_symbolized_lines(self, symbolized_lines):
+ if not symbolized_lines:
+ print self.current_line
+ else:
+ for symbolized_frame in symbolized_lines:
+ print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
+ self.frame_no += 1
+
+ def process_stdin(self):
+ self.frame_no = 0
for line in sys.stdin:
+ self.current_line = line.rstrip()
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
stack_trace_line_format = (
- "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)")
+ '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
match = re.match(stack_trace_line_format, line)
if not match:
- print line.rstrip()
+ print self.current_line
continue
if DEBUG:
print line
- prefix, frameno_str, addr, binary, offset = match.groups()
- if (frameno_str == "0"):
+ _, frameno_str, addr, binary, offset = match.groups()
+ if frameno_str == '0':
# Assume that frame #0 is the first frame of new stack trace.
- frame_no = 0
+ self.frame_no = 0
+ original_binary = binary
+ if self.binary_name_filter:
+ binary = self.binary_name_filter(binary)
symbolized_line = self.symbolize_address(addr, binary, offset)
if not symbolized_line:
- print line.rstrip()
- else:
- for symbolized_frame in symbolized_line:
- print " #" + str(frame_no) + " " + symbolized_frame.rstrip()
- frame_no += 1
+ if original_binary != binary:
+ symbolized_line = self.symbolize_address(addr, binary, offset)
+ self.print_symbolized_lines(symbolized_line)
if __name__ == '__main__':
loop = SymbolizationLoop()
- loop.loop()
+ loop.process_stdin()
More information about the llvm-commits
mailing list