[compiler-rt] f3d31c7 - Revert "[HWASan] Clean up hwasan_symbolize."

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Fri May 6 06:53:54 PDT 2022


Author: Nico Weber
Date: 2022-05-06T09:53:45-04:00
New Revision: f3d31c7f8173624ed436a5754ffeab5952136058

URL: https://github.com/llvm/llvm-project/commit/f3d31c7f8173624ed436a5754ffeab5952136058
DIFF: https://github.com/llvm/llvm-project/commit/f3d31c7f8173624ed436a5754ffeab5952136058.diff

LOG: Revert "[HWASan] Clean up hwasan_symbolize."

This reverts commit 6ca1df61d29c1c46d8d6f51a1091a7651c8b1ab1.
Prerequisite for reverting 4af9392e13a212fe295dc.

Added: 
    

Modified: 
    compiler-rt/lib/hwasan/scripts/hwasan_symbolize

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index 7f36c3983a5e4..3b457c202daf3 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -31,6 +31,9 @@ if sys.version_info.major < 3:
   import codecs
   sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
+last_access_address = None
+last_access_tag = None
+
 # Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
 # and only parses what is necessary to find the build ids. It uses a memoryview
 # into an mmap to avoid copying.
@@ -107,8 +110,6 @@ class Symbolizer:
     self.__index = {}
     self.__link_prefixes = []
     self.__html = False
-    self.__last_access_address = None
-    self.__last_access_tag = None
 
   def enable_html(self, enable):
     self.__html = enable
@@ -267,81 +268,147 @@ class Symbolizer:
           if bid is not None:
             self.__index[bid] = filename
 
-  def symbolize_line(self, line):
-    #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
-    match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
-                    r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
-    if match:
-      frameno = match.group(2)
-      binary = match.group(5)
-      addr = int(match.group(6), 16)
-      buildid = match.group(7)
-
-      frames = list(self.iter_call_stack(binary, buildid, addr))
-
-      if len(frames) > 0:
-        self.print(
-          self.maybe_escape(
-            "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
-                              frames[0][0])
-          ) + self.maybe_linkify(frames[0][1]),
-          escape=False)
-        for i in range(1, len(frames)):
-          space1 = ' ' * match.end(1)
-          space2 = ' ' * (match.start(4) - match.end(1) - 2)
-          self.print(
-            self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
-              + self.maybe_linkify(frames[i][1]), escape=False)
-      else:
-        self.print(line.rstrip())
+def symbolize_line(line, symbolizer_path):
+  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+  match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
+                   r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+  if match:
+    frameno = match.group(2)
+    binary = match.group(5)
+    addr = int(match.group(6), 16)
+    buildid = match.group(7)
+
+    frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
+
+    if len(frames) > 0:
+      symbolizer.print(
+        symbolizer.maybe_escape(
+          "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
+                            frames[0][0])
+        ) + symbolizer.maybe_linkify(frames[0][1]),
+        escape=False)
+      for i in range(1, len(frames)):
+        space1 = ' ' * match.end(1)
+        space2 = ' ' * (match.start(4) - match.end(1) - 2)
+        symbolizer.print(
+          symbolizer.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
+            + symbolizer.maybe_linkify(frames[i][1]), escape=False)
     else:
-      self.print(line.rstrip())
-
-  def save_access_address(self, line):
-    match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
-    if match:
-      self.__last_access_address = int(match.group(2), 16)
-    match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
-    if match:
-      self.__last_access_tag = int(match.group(2), 16)
-
-  def process_stack_history(self, line, ignore_tags=False):
-    if self.__last_access_address is None or self.__last_access_tag is None:
-      return
-    if re.match(r'Previously allocated frames:', line, re.UNICODE):
-      return True
-    pc_mask = (1 << 48) - 1
-    fp_mask = (1 << 20) - 1
-    # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
-    match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
-                    r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
-    if match:
-      record_addr = int(match.group(2), 16)
-      record = int(match.group(3), 16)
-      binary = match.group(4)
-      addr = int(match.group(5), 16)
-      buildid = match.group(6)
-      base_tag = (record_addr >> 3) & 0xFF
-      fp = (record >> 48) << 4
-      pc = record & pc_mask
-
-      for local in self.iter_locals(binary, addr, buildid):
-        frame_offset = local[3]
-        size = local[4]
-        if frame_offset is None or size is None:
-          continue
-        obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
-        if obj_offset >= size:
-          continue
-        tag_offset = local[5]
-        if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
-          continue
-        self.print('')
-        self.print('Potentially referenced stack object:')
-        self.print('  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
-        self.print('  at %s' % (local[1],))
-      return True
-    return False
+      symbolizer.print(line.rstrip())
+  else:
+    symbolizer.print(line.rstrip())
+
+def save_access_address(line):
+  global last_access_address, last_access_tag
+  match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
+  if match:
+    last_access_address = int(match.group(2), 16)
+  match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
+  if match:
+    last_access_tag = int(match.group(2), 16)
+
+def process_stack_history(line, symbolizer, ignore_tags=False):
+  if last_access_address is None or last_access_tag is None:
+    return
+  if re.match(r'Previously allocated frames:', line, re.UNICODE):
+    return True
+  pc_mask = (1 << 48) - 1
+  fp_mask = (1 << 20) - 1
+  # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+  match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
+                   r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+  if match:
+    record_addr = int(match.group(2), 16)
+    record = int(match.group(3), 16)
+    binary = match.group(4)
+    addr = int(match.group(5), 16)
+    buildid = match.group(6)
+    base_tag = (record_addr >> 3) & 0xFF
+    fp = (record >> 48) << 4
+    pc = record & pc_mask
+
+    for local in symbolizer.iter_locals(binary, addr, buildid):
+      frame_offset = local[3]
+      size = local[4]
+      if frame_offset is None or size is None:
+        continue
+      obj_offset = (last_access_address - fp - frame_offset) & fp_mask
+      if obj_offset >= size:
+        continue
+      tag_offset = local[5]
+      if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
+        continue
+      symbolizer.print('')
+      symbolizer.print('Potentially referenced stack object:')
+      symbolizer.print('  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
+      symbolizer.print('  at %s' % (local[1],))
+    return True
+  return False
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-d', action='store_true')
+parser.add_argument('-v', action='store_true')
+parser.add_argument('--ignore-tags', action='store_true')
+parser.add_argument('--symbols', action='append')
+parser.add_argument('--source', action='append')
+parser.add_argument('--index', action='store_true')
+parser.add_argument('--symbolizer')
+parser.add_argument('--linkify', type=str)
+parser.add_argument('--html', action='store_true')
+parser.add_argument('args', nargs=argparse.REMAINDER)
+args = parser.parse_args()
+
+# Unstripped binaries location.
+binary_prefixes = args.symbols or []
+if not binary_prefixes:
+  if 'ANDROID_PRODUCT_OUT' in os.environ:
+    product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
+    binary_prefixes.append(product_out)
+  binary_prefixes.append('/')
+
+for p in binary_prefixes:
+  if not os.path.isdir(p):
+    print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
+    sys.exit(1)
+
+# Source location.
+paths_to_cut = args.source or []
+if not paths_to_cut:
+  paths_to_cut.append(os.getcwd() + '/')
+  if 'ANDROID_BUILD_TOP' in os.environ:
+    paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
+
+# llvm-symbolizer binary.
+# 1. --symbolizer flag
+# 2. environment variable
+# 3. unsuffixed binary in the current directory
+# 4. if inside Android platform, prebuilt binary at a known path
+# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
+#    highest available version in $PATH
+symbolizer_path = args.symbolizer
+if not symbolizer_path:
+  if 'LLVM_SYMBOLIZER_PATH' in os.environ:
+    symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
+  elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
+    symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
+
+if not symbolizer_path:
+  s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
+  if os.path.exists(s):
+    symbolizer_path = s
+
+if not symbolizer_path:
+  if 'ANDROID_BUILD_TOP' in os.environ:
+    s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
+    if os.path.exists(s):
+      symbolizer_path = s
+
+if not symbolizer_path:
+  for path in os.environ["PATH"].split(os.pathsep):
+    p = os.path.join(path, 'llvm-symbolizer')
+    if os.path.exists(p):
+      symbolizer_path = p
+      break
 
 def extract_version(s):
   idx = s.rfind('-')
@@ -350,114 +417,44 @@ def extract_version(s):
   x = float(s[idx + 1:])
   return x
 
-def main():
-  parser = argparse.ArgumentParser()
-  parser.add_argument('-d', action='store_true')
-  parser.add_argument('-v', action='store_true')
-  parser.add_argument('--ignore-tags', action='store_true')
-  parser.add_argument('--symbols', action='append')
-  parser.add_argument('--source', action='append')
-  parser.add_argument('--index', action='store_true')
-  parser.add_argument('--symbolizer')
-  parser.add_argument('--linkify', type=str)
-  parser.add_argument('--html', action='store_true')
-  parser.add_argument('args', nargs=argparse.REMAINDER)
-  args = parser.parse_args()
-
-  # Unstripped binaries location.
-  binary_prefixes = args.symbols or []
-  if not binary_prefixes:
-    if 'ANDROID_PRODUCT_OUT' in os.environ:
-      product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
-      binary_prefixes.append(product_out)
-    binary_prefixes.append('/')
-
-  for p in binary_prefixes:
-    if not os.path.isdir(p):
-      print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
-      sys.exit(1)
-
-  # Source location.
-  paths_to_cut = args.source or []
-  if not paths_to_cut:
-    paths_to_cut.append(os.getcwd() + '/')
-    if 'ANDROID_BUILD_TOP' in os.environ:
-      paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
-
-  # llvm-symbolizer binary.
-  # 1. --symbolizer flag
-  # 2. environment variable
-  # 3. unsuffixed binary in the current directory
-  # 4. if inside Android platform, prebuilt binary at a known path
-  # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
-  #    highest available version in $PATH
-  symbolizer_path = args.symbolizer
-  if not symbolizer_path:
-    if 'LLVM_SYMBOLIZER_PATH' in os.environ:
-      symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
-    elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
-      symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
-
-  if not symbolizer_path:
-    s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
-    if os.path.exists(s):
-      symbolizer_path = s
-
-  if not symbolizer_path:
-    if 'ANDROID_BUILD_TOP' in os.environ:
-      s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
-      if os.path.exists(s):
-        symbolizer_path = s
-
-  if not symbolizer_path:
-    for path in os.environ["PATH"].split(os.pathsep):
-      p = os.path.join(path, 'llvm-symbolizer')
-      if os.path.exists(p):
-        symbolizer_path = p
-        break
-
-  if not symbolizer_path:
-    for path in os.environ["PATH"].split(os.pathsep):
-      candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
-      if len(candidates) > 0:
-        candidates.sort(key = extract_version, reverse = True)
-        symbolizer_path = candidates[0]
-        break
-
-  if not os.path.exists(symbolizer_path):
-    print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
+if not symbolizer_path:
+  for path in os.environ["PATH"].split(os.pathsep):
+    candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
+    if len(candidates) > 0:
+      candidates.sort(key = extract_version, reverse = True)
+      symbolizer_path = candidates[0]
+      break
+
+if not os.path.exists(symbolizer_path):
+  print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
+  sys.exit(1)
+
+if args.v:
+  print("Looking for symbols in:")
+  for s in binary_prefixes:
+    print("  %s" % (s,))
+  print("Stripping source path prefixes:")
+  for s in paths_to_cut:
+    print("  %s" % (s,))
+  print("Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,))
+  print()
+
+symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
+symbolizer.enable_html(args.html)
+symbolizer.enable_logging(args.d)
+if args.index:
+  symbolizer.build_index()
+
+if args.linkify:
+  if not args.html:
+    print('Need --html to --linkify', file=sys.stderr)
     sys.exit(1)
-
-  if args.v:
-    print("Looking for symbols in:")
-    for s in binary_prefixes:
-      print("  %s" % (s,))
-    print("Stripping source path prefixes:")
-    for s in paths_to_cut:
-      print("  %s" % (s,))
-    print("Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,))
-    print()
-
-  symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
-  symbolizer.enable_html(args.html)
-  symbolizer.enable_logging(args.d)
-  if args.index:
-    symbolizer.build_index()
-
-  if args.linkify:
-    if not args.html:
-      print('Need --html to --linkify', file=sys.stderr)
-      sys.exit(1)
-    symbolizer.read_linkify(args.linkify)
-
-  for line in sys.stdin:
-    if sys.version_info.major < 3:
-      line = line.decode('utf-8')
-    symbolizer.save_access_address(line)
-    if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
-      continue
-    symbolizer.symbolize_line(line)
-
-
-if __name__ == '__main__':
-  main()
+  symbolizer.read_linkify(args.linkify)
+
+for line in sys.stdin:
+  if sys.version_info.major < 3:
+    line = line.decode('utf-8')
+  save_access_address(line)
+  if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
+    continue
+  symbolize_line(line, symbolizer_path)


        


More information about the llvm-commits mailing list