[compiler-rt] 6ca1df6 - [HWASan] Clean up hwasan_symbolize.

Florian Mayer via llvm-commits llvm-commits at lists.llvm.org
Thu May 5 16:41:21 PDT 2022


Author: Florian Mayer
Date: 2022-05-05T16:41:11-07:00
New Revision: 6ca1df61d29c1c46d8d6f51a1091a7651c8b1ab1

URL: https://github.com/llvm/llvm-project/commit/6ca1df61d29c1c46d8d6f51a1091a7651c8b1ab1
DIFF: https://github.com/llvm/llvm-project/commit/6ca1df61d29c1c46d8d6f51a1091a7651c8b1ab1.diff

LOG: [HWASan] Clean up hwasan_symbolize.

The globals are better expressed as members of the Symbolizer, and all
functions operating on it should be methods instead.

Also using the standard idiom of wrapping the main code in
`if __name__ == '__main__'`.

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D125032

Added: 
    

Modified: 
    compiler-rt/lib/hwasan/scripts/hwasan_symbolize

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index 3b457c202daf3..7f36c3983a5e4 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -31,9 +31,6 @@ if sys.version_info.major < 3:
   import codecs
   sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
-last_access_address = None
-last_access_tag = None
-
 # Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
 # and only parses what is necessary to find the build ids. It uses a memoryview
 # into an mmap to avoid copying.
@@ -110,6 +107,8 @@ class Symbolizer:
     self.__index = {}
     self.__link_prefixes = []
     self.__html = False
+    self.__last_access_address = None
+    self.__last_access_tag = None
 
   def enable_html(self, enable):
     self.__html = enable
@@ -268,147 +267,81 @@ class Symbolizer:
           if bid is not None:
             self.__index[bid] = filename
 
-def symbolize_line(line, symbolizer_path):
-  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
-  match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
-                   r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
-  if match:
-    frameno = match.group(2)
-    binary = match.group(5)
-    addr = int(match.group(6), 16)
-    buildid = match.group(7)
-
-    frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
-
-    if len(frames) > 0:
-      symbolizer.print(
-        symbolizer.maybe_escape(
-          "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
-                            frames[0][0])
-        ) + symbolizer.maybe_linkify(frames[0][1]),
-        escape=False)
-      for i in range(1, len(frames)):
-        space1 = ' ' * match.end(1)
-        space2 = ' ' * (match.start(4) - match.end(1) - 2)
-        symbolizer.print(
-          symbolizer.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
-            + symbolizer.maybe_linkify(frames[i][1]), escape=False)
+  def symbolize_line(self, line):
+    #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+    match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
+                    r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+    if match:
+      frameno = match.group(2)
+      binary = match.group(5)
+      addr = int(match.group(6), 16)
+      buildid = match.group(7)
+
+      frames = list(self.iter_call_stack(binary, buildid, addr))
+
+      if len(frames) > 0:
+        self.print(
+          self.maybe_escape(
+            "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
+                              frames[0][0])
+          ) + self.maybe_linkify(frames[0][1]),
+          escape=False)
+        for i in range(1, len(frames)):
+          space1 = ' ' * match.end(1)
+          space2 = ' ' * (match.start(4) - match.end(1) - 2)
+          self.print(
+            self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
+              + self.maybe_linkify(frames[i][1]), escape=False)
+      else:
+        self.print(line.rstrip())
     else:
-      symbolizer.print(line.rstrip())
-  else:
-    symbolizer.print(line.rstrip())
-
-def save_access_address(line):
-  global last_access_address, last_access_tag
-  match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
-  if match:
-    last_access_address = int(match.group(2), 16)
-  match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
-  if match:
-    last_access_tag = int(match.group(2), 16)
-
-def process_stack_history(line, symbolizer, ignore_tags=False):
-  if last_access_address is None or last_access_tag is None:
-    return
-  if re.match(r'Previously allocated frames:', line, re.UNICODE):
-    return True
-  pc_mask = (1 << 48) - 1
-  fp_mask = (1 << 20) - 1
-  # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
-  match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
-                   r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
-  if match:
-    record_addr = int(match.group(2), 16)
-    record = int(match.group(3), 16)
-    binary = match.group(4)
-    addr = int(match.group(5), 16)
-    buildid = match.group(6)
-    base_tag = (record_addr >> 3) & 0xFF
-    fp = (record >> 48) << 4
-    pc = record & pc_mask
-
-    for local in symbolizer.iter_locals(binary, addr, buildid):
-      frame_offset = local[3]
-      size = local[4]
-      if frame_offset is None or size is None:
-        continue
-      obj_offset = (last_access_address - fp - frame_offset) & fp_mask
-      if obj_offset >= size:
-        continue
-      tag_offset = local[5]
-      if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
-        continue
-      symbolizer.print('')
-      symbolizer.print('Potentially referenced stack object:')
-      symbolizer.print('  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
-      symbolizer.print('  at %s' % (local[1],))
-    return True
-  return False
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-d', action='store_true')
-parser.add_argument('-v', action='store_true')
-parser.add_argument('--ignore-tags', action='store_true')
-parser.add_argument('--symbols', action='append')
-parser.add_argument('--source', action='append')
-parser.add_argument('--index', action='store_true')
-parser.add_argument('--symbolizer')
-parser.add_argument('--linkify', type=str)
-parser.add_argument('--html', action='store_true')
-parser.add_argument('args', nargs=argparse.REMAINDER)
-args = parser.parse_args()
-
-# Unstripped binaries location.
-binary_prefixes = args.symbols or []
-if not binary_prefixes:
-  if 'ANDROID_PRODUCT_OUT' in os.environ:
-    product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
-    binary_prefixes.append(product_out)
-  binary_prefixes.append('/')
-
-for p in binary_prefixes:
-  if not os.path.isdir(p):
-    print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
-    sys.exit(1)
-
-# Source location.
-paths_to_cut = args.source or []
-if not paths_to_cut:
-  paths_to_cut.append(os.getcwd() + '/')
-  if 'ANDROID_BUILD_TOP' in os.environ:
-    paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
-
-# llvm-symbolizer binary.
-# 1. --symbolizer flag
-# 2. environment variable
-# 3. unsuffixed binary in the current directory
-# 4. if inside Android platform, prebuilt binary at a known path
-# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
-#    highest available version in $PATH
-symbolizer_path = args.symbolizer
-if not symbolizer_path:
-  if 'LLVM_SYMBOLIZER_PATH' in os.environ:
-    symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
-  elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
-    symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
-
-if not symbolizer_path:
-  s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
-  if os.path.exists(s):
-    symbolizer_path = s
-
-if not symbolizer_path:
-  if 'ANDROID_BUILD_TOP' in os.environ:
-    s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
-    if os.path.exists(s):
-      symbolizer_path = s
-
-if not symbolizer_path:
-  for path in os.environ["PATH"].split(os.pathsep):
-    p = os.path.join(path, 'llvm-symbolizer')
-    if os.path.exists(p):
-      symbolizer_path = p
-      break
+      self.print(line.rstrip())
+
+  def save_access_address(self, line):
+    match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
+    if match:
+      self.__last_access_address = int(match.group(2), 16)
+    match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
+    if match:
+      self.__last_access_tag = int(match.group(2), 16)
+
+  def process_stack_history(self, line, ignore_tags=False):
+    if self.__last_access_address is None or self.__last_access_tag is None:
+      return
+    if re.match(r'Previously allocated frames:', line, re.UNICODE):
+      return True
+    pc_mask = (1 << 48) - 1
+    fp_mask = (1 << 20) - 1
+    # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+    match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
+                    r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+    if match:
+      record_addr = int(match.group(2), 16)
+      record = int(match.group(3), 16)
+      binary = match.group(4)
+      addr = int(match.group(5), 16)
+      buildid = match.group(6)
+      base_tag = (record_addr >> 3) & 0xFF
+      fp = (record >> 48) << 4
+      pc = record & pc_mask
+
+      for local in self.iter_locals(binary, addr, buildid):
+        frame_offset = local[3]
+        size = local[4]
+        if frame_offset is None or size is None:
+          continue
+        obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
+        if obj_offset >= size:
+          continue
+        tag_offset = local[5]
+        if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
+          continue
+        self.print('')
+        self.print('Potentially referenced stack object:')
+        self.print('  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
+        self.print('  at %s' % (local[1],))
+      return True
+    return False
 
 def extract_version(s):
   idx = s.rfind('-')
@@ -417,44 +350,114 @@ def extract_version(s):
   x = float(s[idx + 1:])
   return x
 
-if not symbolizer_path:
-  for path in os.environ["PATH"].split(os.pathsep):
-    candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
-    if len(candidates) > 0:
-      candidates.sort(key = extract_version, reverse = True)
-      symbolizer_path = candidates[0]
-      break
-
-if not os.path.exists(symbolizer_path):
-  print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
-  sys.exit(1)
-
-if args.v:
-  print("Looking for symbols in:")
-  for s in binary_prefixes:
-    print("  %s" % (s,))
-  print("Stripping source path prefixes:")
-  for s in paths_to_cut:
-    print("  %s" % (s,))
-  print("Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,))
-  print()
-
-symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
-symbolizer.enable_html(args.html)
-symbolizer.enable_logging(args.d)
-if args.index:
-  symbolizer.build_index()
-
-if args.linkify:
-  if not args.html:
-    print('Need --html to --linkify', file=sys.stderr)
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-d', action='store_true')
+  parser.add_argument('-v', action='store_true')
+  parser.add_argument('--ignore-tags', action='store_true')
+  parser.add_argument('--symbols', action='append')
+  parser.add_argument('--source', action='append')
+  parser.add_argument('--index', action='store_true')
+  parser.add_argument('--symbolizer')
+  parser.add_argument('--linkify', type=str)
+  parser.add_argument('--html', action='store_true')
+  parser.add_argument('args', nargs=argparse.REMAINDER)
+  args = parser.parse_args()
+
+  # Unstripped binaries location.
+  binary_prefixes = args.symbols or []
+  if not binary_prefixes:
+    if 'ANDROID_PRODUCT_OUT' in os.environ:
+      product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
+      binary_prefixes.append(product_out)
+    binary_prefixes.append('/')
+
+  for p in binary_prefixes:
+    if not os.path.isdir(p):
+      print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
+      sys.exit(1)
+
+  # Source location.
+  paths_to_cut = args.source or []
+  if not paths_to_cut:
+    paths_to_cut.append(os.getcwd() + '/')
+    if 'ANDROID_BUILD_TOP' in os.environ:
+      paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
+
+  # llvm-symbolizer binary.
+  # 1. --symbolizer flag
+  # 2. environment variable
+  # 3. unsuffixed binary in the current directory
+  # 4. if inside Android platform, prebuilt binary at a known path
+  # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
+  #    highest available version in $PATH
+  symbolizer_path = args.symbolizer
+  if not symbolizer_path:
+    if 'LLVM_SYMBOLIZER_PATH' in os.environ:
+      symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
+    elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
+      symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
+
+  if not symbolizer_path:
+    s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
+    if os.path.exists(s):
+      symbolizer_path = s
+
+  if not symbolizer_path:
+    if 'ANDROID_BUILD_TOP' in os.environ:
+      s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
+      if os.path.exists(s):
+        symbolizer_path = s
+
+  if not symbolizer_path:
+    for path in os.environ["PATH"].split(os.pathsep):
+      p = os.path.join(path, 'llvm-symbolizer')
+      if os.path.exists(p):
+        symbolizer_path = p
+        break
+
+  if not symbolizer_path:
+    for path in os.environ["PATH"].split(os.pathsep):
+      candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
+      if len(candidates) > 0:
+        candidates.sort(key = extract_version, reverse = True)
+        symbolizer_path = candidates[0]
+        break
+
+  if not os.path.exists(symbolizer_path):
+    print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
     sys.exit(1)
-  symbolizer.read_linkify(args.linkify)
-
-for line in sys.stdin:
-  if sys.version_info.major < 3:
-    line = line.decode('utf-8')
-  save_access_address(line)
-  if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
-    continue
-  symbolize_line(line, symbolizer_path)
+
+  if args.v:
+    print("Looking for symbols in:")
+    for s in binary_prefixes:
+      print("  %s" % (s,))
+    print("Stripping source path prefixes:")
+    for s in paths_to_cut:
+      print("  %s" % (s,))
+    print("Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,))
+    print()
+
+  symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
+  symbolizer.enable_html(args.html)
+  symbolizer.enable_logging(args.d)
+  if args.index:
+    symbolizer.build_index()
+
+  if args.linkify:
+    if not args.html:
+      print('Need --html to --linkify', file=sys.stderr)
+      sys.exit(1)
+    symbolizer.read_linkify(args.linkify)
+
+  for line in sys.stdin:
+    if sys.version_info.major < 3:
+      line = line.decode('utf-8')
+    symbolizer.save_access_address(line)
+    if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
+      continue
+    symbolizer.symbolize_line(line)
+
+
+if __name__ == '__main__':
+  main()


        


More information about the llvm-commits mailing list