[llvm-commits] [compiler-rt] r164695 - /compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py

Alexander Potapenko glider at google.com
Wed Sep 26 06:16:42 PDT 2012


Author: glider
Date: Wed Sep 26 08:16:42 2012
New Revision: 164695

URL: http://llvm.org/viewvc/llvm-project?rev=164695&view=rev
Log:
Fixed a number of gpylint warnings, added binary names filtering (useful for Chrome), minor fixes.

Modified:
    compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py

Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?rev=164695&r1=164694&r2=164695&view=diff
==============================================================================
--- compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py (original)
+++ compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Wed Sep 26 08:16:42 2012
@@ -10,8 +10,8 @@
 import bisect
 import os
 import re
-import sys
 import subprocess
+import sys
 
 llvm_symbolizer = None
 symbolizers = {}
@@ -23,23 +23,25 @@
 # FIXME: merge the code that calls fix_filename().
 def fix_filename(file_name):
   for path_to_cut in sys.argv[1:]:
-    file_name = re.sub(".*" + path_to_cut, "", file_name)
-  file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
-  file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
+    file_name = re.sub('.*' + path_to_cut, '', file_name)
+  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
+  file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
   return file_name
 
 
 class Symbolizer(object):
   def __init__(self):
     pass
+
   def symbolize(self, addr, binary, offset):
-    """
-      Overrided in subclasses.
-      Args:
+    """Symbolize the given address (pair of binary and offset).
+
+    Overriden in subclasses.
+    Args:
         addr: virtual address of an instruction.
         binary: path to executable/shared object containing this instruction.
         offset: instruction offset in the @binary.
-      Returns:
+    Returns:
         list of strings (one string for each inlined frame) describing
         the code locations for this instruction (that is, function name, file
         name, line and column numbers).
@@ -52,51 +54,54 @@
     super(LLVMSymbolizer, self).__init__()
     self.symbolizer_path = symbolizer_path
     self.pipe = self.open_llvm_symbolizer()
+
   def open_llvm_symbolizer(self):
     if not os.path.exists(self.symbolizer_path):
       return None
     cmd = [self.symbolizer_path,
-           "--use-symbol-table=true",
-           "--demangle=false",
-           "--functions=true",
-           "--inlining=true"]
+           '--use-symbol-table=true',
+           '--demangle=false',
+           '--functions=true',
+           '--inlining=true']
     if DEBUG:
       print ' '.join(cmd)
     return subprocess.Popen(cmd, stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
 
   def symbolize(self, addr, binary, offset):
-    """Overrides Symbolizer.symbolize"""
+    """Overrides Symbolizer.symbolize."""
     if not self.pipe:
       return None
     result = []
     try:
-      symbolizer_input = "%s %s" % (binary, offset)
+      symbolizer_input = '%s %s' % (binary, offset)
+      if DEBUG:
+        print symbolizer_input
       print >> self.pipe.stdin, symbolizer_input
       while True:
         function_name = self.pipe.stdout.readline().rstrip()
-        if (function_name == ""):
+        if not function_name:
           break
         file_name = self.pipe.stdout.readline().rstrip()
         file_name = fix_filename(file_name)
-        if (not function_name.startswith("??") and
-            not file_name.startswith("??")):
+        if (not function_name.startswith('??') and
+            not file_name.startswith('??')):
           # Append only valid frames.
-          result.append("%s in %s %s" % (addr, function_name,
+          result.append('%s in %s %s' % (addr, function_name,
                                          file_name))
     except Exception:
       result = []
-    if len(result) == 0:
+    if not result:
       result = None
     return result
 
 
 def LLVMSymbolizerFactory(system):
-  if system == "Linux":
-    symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
+  if system == 'Linux':
+    symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
     if not symbolizer_path:
       # Assume llvm-symbolizer is in PATH.
-      symbolizer_path = "llvm-symbolizer"
+      symbolizer_path = 'llvm-symbolizer'
     return LLVMSymbolizer(symbolizer_path)
   return None
 
@@ -106,42 +111,45 @@
     super(Addr2LineSymbolizer, self).__init__()
     self.binary = binary
     self.pipe = self.open_addr2line()
+
   def open_addr2line(self):
-    cmd = ["addr2line", "-f", "-e", self.binary]
+    cmd = ['addr2line', '-f', '-e', self.binary]
     if DEBUG:
       print ' '.join(cmd)
     return subprocess.Popen(cmd,
                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
   def symbolize(self, addr, binary, offset):
-    """Overrides Symbolizer.symbolize"""
+    """Overrides Symbolizer.symbolize."""
     if self.binary != binary:
       return None
     try:
       print >> self.pipe.stdin, offset
       function_name = self.pipe.stdout.readline().rstrip()
-      file_name     = self.pipe.stdout.readline().rstrip()
+      file_name = self.pipe.stdout.readline().rstrip()
     except Exception:
-      function_name = ""
-      file_name = ""
+      function_name = ''
+      file_name = ''
     file_name = fix_filename(file_name)
-    return ["%s in %s %s" % (addr, function_name, file_name)]
+    return ['%s in %s %s' % (addr, function_name, file_name)]
 
 
 class DarwinSymbolizer(Symbolizer):
   def __init__(self, addr, binary):
     super(DarwinSymbolizer, self).__init__()
     self.binary = binary
-    # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
+    # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
     if len(addr) > 10:
-      self.arch = "x86_64"
+      self.arch = 'x86_64'
     else:
-      self.arch = "i386"
+      self.arch = 'i386'
     self.vmaddr = None
     self.pipe = None
+
   def get_binary_vmaddr(self):
-    """
-    Get the slide value to be added to the address.
-    We're ooking for the following piece in otool -l output:
+    """Get the slide value to be added to the address.
+
+    We're looking for the following piece in otool -l output:
       Load command 0
       cmd LC_SEGMENT
       cmdsize 736
@@ -150,13 +158,13 @@
     """
     if self.vmaddr:
       return self.vmaddr
-    cmdline = ["otool", "-l", self.binary]
+    cmdline = ['otool', '-l', self.binary]
     pipe = subprocess.Popen(cmdline,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
     is_text = False
     vmaddr = 0
-    for line in pipe.stdout.readlines():
+    for line in pipe.stdout:
       line = line.strip()
       if line.startswith('segname'):
         is_text = (line == 'segname __TEXT')
@@ -167,19 +175,22 @@
         break
     self.vmaddr = vmaddr
     return self.vmaddr
+
   def write_addr_to_pipe(self, offset):
     slide = self.get_binary_vmaddr()
-    print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
+    print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide)
+
   def open_atos(self):
     if DEBUG:
-      print "atos -o %s -arch %s" % (self.binary, self.arch)
-    cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
+      print 'atos -o %s -arch %s' % (self.binary, self.arch)
+    cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
     self.pipe = subprocess.Popen(cmdline,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
+
   def symbolize(self, addr, binary, offset):
-    """Overrides Symbolizer.symbolize"""
+    """Overrides Symbolizer.symbolize."""
     if self.binary != binary:
       return None
     self.open_atos()
@@ -190,14 +201,14 @@
     #   foo(type1, type2) (in object.name) (filename.cc:80)
     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
     if DEBUG:
-      print "atos_line: ", atos_line
+      print 'atos_line: ', atos_line
     if match:
       function_name = match.group(1)
-      function_name = re.sub("\(.*?\)", "", function_name)
+      function_name = re.sub('\(.*?\)', '', function_name)
       file_name = fix_filename(match.group(3))
-      return ["%s in %s %s" % (addr, function_name, file_name)]
+      return ['%s in %s %s' % (addr, function_name, file_name)]
     else:
-      return ["%s in %s" % (addr, atos_line)]
+      return ['%s in %s' % (addr, atos_line)]
 
 
 # Chain several symbolizers so that if one symbolizer fails, we fall back
@@ -206,20 +217,22 @@
   def __init__(self, symbolizer_list):
     super(ChainSymbolizer, self).__init__()
     self.symbolizer_list = symbolizer_list
+
   def symbolize(self, addr, binary, offset):
-    """Overrides Symbolizer.symbolize"""
+    """Overrides Symbolizer.symbolize."""
     for symbolizer in self.symbolizer_list:
       if symbolizer:
         result = symbolizer.symbolize(addr, binary, offset)
         if result:
           return result
     return None
+
   def append_symbolizer(self, symbolizer):
     self.symbolizer_list.append(symbolizer)
 
 
-def BreakpadSymbolizerFactory(addr, binary):
-  suffix = os.getenv("BREAKPAD_SUFFIX")
+def BreakpadSymbolizerFactory(binary):
+  suffix = os.getenv('BREAKPAD_SUFFIX')
   if suffix:
     filename = binary + suffix
     if os.access(filename, os.F_OK):
@@ -249,6 +262,7 @@
     self.debug_id = fragments[3]
     self.binary = ' '.join(fragments[4:])
     self.parse_lines(lines[1:])
+
   def parse_lines(self, lines):
     cur_function_addr = ''
     for line in lines:
@@ -274,6 +288,7 @@
                                 int(fragments[2]),
                                 int(fragments[3]))
     self.address_list.sort()
+
   def get_sym_file_line(self, addr):
     key = None
     if addr in self.addresses.keys():
@@ -291,13 +306,14 @@
       return symbol, filename, line_no
     else:
       return None
+
   def symbolize(self, addr, binary, offset):
     if self.binary != binary:
       return None
     res = self.get_sym_file_line(int(offset, 16))
     if res:
       function_name, file_name, line_no = res
-      result = ["%s in %s %s:%d" % (
+      result = ['%s in %s %s:%d' % (
           addr, function_name, file_name, line_no)]
       print result
       return result
@@ -306,19 +322,23 @@
 
 
 class SymbolizationLoop(object):
-  def __init__(self):
+  def __init__(self, binary_name_filter=None):
+    # Used by clients who may want to supply a different binary name.
+    # E.g. in Chrome several binaries may share a single .dSYM.
+    self.binary_name_filter = binary_name_filter
     self.system = os.uname()[0]
     if self.system in ['Linux', 'Darwin']:
       self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
     else:
-      raise Exception("Unknown system")
+      raise Exception('Unknown system')
+
   def symbolize_address(self, addr, binary, offset):
     # Use the chain of symbolizers:
     # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
     # (fall back to next symbolizer if the previous one fails).
-    if not symbolizers.has_key(binary):
+    if not binary in symbolizers:
       symbolizers[binary] = ChainSymbolizer(
-          [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer])
+          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
     result = symbolizers[binary].symbolize(addr, binary, offset)
     if result is None:
       # Initialize system symbolizer only if other symbolizers failed.
@@ -326,33 +346,44 @@
           SystemSymbolizerFactory(self.system, addr, binary))
       result = symbolizers[binary].symbolize(addr, binary, offset)
     # The system symbolizer must produce some result.
-    assert(result)
+    assert result
     return result
-  def loop(self):
-    frame_no = 0
+
+  def print_symbolized_lines(self, symbolized_lines):
+    if not symbolized_lines:
+      print self.current_line
+    else:
+      for symbolized_frame in symbolized_lines:
+        print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
+        self.frame_no += 1
+
+  def process_stdin(self):
+    self.frame_no = 0
     for line in sys.stdin:
+      self.current_line = line.rstrip()
       #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
       stack_trace_line_format = (
-          "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)")
+          '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
       match = re.match(stack_trace_line_format, line)
       if not match:
-        print line.rstrip()
+        print self.current_line
         continue
       if DEBUG:
         print line
-      prefix, frameno_str, addr, binary, offset = match.groups()
-      if (frameno_str == "0"):
+      _, frameno_str, addr, binary, offset = match.groups()
+      if frameno_str == '0':
         # Assume that frame #0 is the first frame of new stack trace.
-        frame_no = 0
+        self.frame_no = 0
+      original_binary = binary
+      if self.binary_name_filter:
+        binary = self.binary_name_filter(binary)
       symbolized_line = self.symbolize_address(addr, binary, offset)
       if not symbolized_line:
-        print line.rstrip()
-      else:
-        for symbolized_frame in symbolized_line:
-          print "    #" + str(frame_no) + " " + symbolized_frame.rstrip()
-          frame_no += 1
+        if original_binary != binary:
+          symbolized_line = self.symbolize_address(addr, binary, offset)
+      self.print_symbolized_lines(symbolized_line)
 
 
 if __name__ == '__main__':
   loop = SymbolizationLoop()
-  loop.loop()
+  loop.process_stdin()





More information about the llvm-commits mailing list