[compiler-rt] [HWASan] Fix symbol indexing (PR #135967)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 07:10:07 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Stefan Bossbaly (StefanBossbaly)

<details>
<summary>Changes</summary>

Previously we would add any ELF that contained a build id regardless
whether the ELF contained symbols or not. This works for Android since
soong will strip the symbols into a new directory. However other
build systems, like BUCK, will write the stripped file in the same
directory as the unstripped file. This would cause the hwasan_symbolize
script sometimes add then stripped ELF to its index and ignore the
symbolized ELF. The logic has now been changed to only add ELFs that
contain symbols to the index. If two symbolized ELFs are encountered
with the same build id, we now exit out with an error.

Fixes #<!-- -->135966

---
Full diff: https://github.com/llvm/llvm-project/pull/135967.diff


1 Files Affected:

- (modified) compiler-rt/lib/hwasan/scripts/hwasan_symbolize (+31-12) 


``````````diff
diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index efca6b82809b9..a809491ef676d 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -42,6 +42,7 @@ Shdr_size = 64
 sh_type_offset = 4
 sh_offset_offset = 24
 sh_size_offset = 32
+SHT_SYMTAB = 2
 SHT_NOTE = 7
 
 Nhdr_size = 12
@@ -62,8 +63,13 @@ def handle_Nhdr(mv, sh_size):
     offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
   return None
 
-def handle_Shdr(mv):
+def unpack_sh_type(mv):
   sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
+  return sh_type
+
+def handle_Shdr(mv):
+  sh_type = unpack_sh_type(mv)
+  # Sanity check
   if sh_type != SHT_NOTE:
     return None, None
   sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
@@ -76,19 +82,29 @@ def handle_elf(mv):
   # have to extend the parsing code.
   if mv[:6] != b'\x7fELF\x02\x01':
     return None
+  found_symbols = False
+  bid = None
   e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
   e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
   for i in range(0, e_shnum):
     start = e_shoff + i * Shdr_size
-    sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
-    if sh_offset is None:
-      continue
-    note_hdr = mv[sh_offset: sh_offset + sh_size]
-    result = handle_Nhdr(note_hdr, sh_size)
-    if result is not None:
-      return result
+    sh = mv[start: start + Shdr_size]
+    sh_type = unpack_sh_type(sh)
+
+    if sh_type == SHT_SYMTAB:
+      found_symbols = True
+    elif sh_type == SHT_NOTE:
+      sh_offset, sh_size = handle_Shdr(sh)
+      if sh_offset is None:
+        continue
+      note_hdr = mv[sh_offset: sh_offset + sh_size]
+      result = handle_Nhdr(note_hdr, sh_size)
+      if result is not None:
+        bid = result
+
+  return (found_symbols, bid)
 
-def get_buildid(filename):
+def read_elf(filename):
   with open(filename, "r") as fd:
     if os.fstat(fd.fileno()).st_size < Ehdr_size:
       return None
@@ -200,7 +216,7 @@ class Symbolizer:
       if os.path.exists(full_path):
         return full_path
     if name not in self.__warnings:
-      print("Could not find symbols for", name, file=sys.stderr)
+      print("Could not find symbols for {} (Build ID: {})".format(name, buildid), file=sys.stderr)
       self.__warnings.add(name)
     return None
 
@@ -268,13 +284,16 @@ class Symbolizer:
         for fn in fnames:
           filename = os.path.join(dname, fn)
           try:
-            bid = get_buildid(filename)
+            found_symbols, bid = read_elf(filename)
           except FileNotFoundError:
             continue
           except Exception as e:
             print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
             continue
-          if bid is not None:
+          if found_symbols and bid is not None:
+            if bid in self.__index:
+              print("Duplicate build ID {} for {} and {}".format(bid, self.__index[bid], filename), file=sys.stderr)
+              sys.exit(1)
             self.__index[bid] = filename
 
   def symbolize_line(self, line):

``````````

</details>


https://github.com/llvm/llvm-project/pull/135967


More information about the llvm-commits mailing list