[compiler-rt] [HWASan] Fix symbol indexing (PR #135967)

Stefan Bossbaly via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 08:05:00 PDT 2025


https://github.com/StefanBossbaly updated https://github.com/llvm/llvm-project/pull/135967

>From 4b8c3acfa1f272c34ce1bb9e52581edf1bb33b56 Mon Sep 17 00:00:00 2001
From: Stefan Bossbaly <sboss at meta.com>
Date: Wed, 16 Apr 2025 09:41:04 -0400
Subject: [PATCH 1/2] [HWASan] Fix symbol indexing

Previously we would add any ELF that contained a build id regardless
whether the ELF contained symbols or not. This works for Android since
soong will strip the symbols into a new directory. However other
build systems, like BUCK, will write the stripped file in the same
directory as the unstripped file. This would cause the hwasan_symbolize
script sometimes add then stripped ELF to its index and ignore the
symbolized ELF. The logic has now been changed to only add ELFs that
contain symbols to the index. If two symbolized ELFs are encountered
with the same build id, we now exit out with an error.

Fixes #135966
---
 .../lib/hwasan/scripts/hwasan_symbolize       | 43 +++++++++++++------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index efca6b82809b9..a809491ef676d 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -42,6 +42,7 @@ Shdr_size = 64
 sh_type_offset = 4
 sh_offset_offset = 24
 sh_size_offset = 32
+SHT_SYMTAB = 2
 SHT_NOTE = 7
 
 Nhdr_size = 12
@@ -62,8 +63,13 @@ def handle_Nhdr(mv, sh_size):
     offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
   return None
 
-def handle_Shdr(mv):
+def unpack_sh_type(mv):
   sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
+  return sh_type
+
+def handle_Shdr(mv):
+  sh_type = unpack_sh_type(mv)
+  # Sanity check
   if sh_type != SHT_NOTE:
     return None, None
   sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
@@ -76,19 +82,29 @@ def handle_elf(mv):
   # have to extend the parsing code.
   if mv[:6] != b'\x7fELF\x02\x01':
     return None
+  found_symbols = False
+  bid = None
   e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
   e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
   for i in range(0, e_shnum):
     start = e_shoff + i * Shdr_size
-    sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
-    if sh_offset is None:
-      continue
-    note_hdr = mv[sh_offset: sh_offset + sh_size]
-    result = handle_Nhdr(note_hdr, sh_size)
-    if result is not None:
-      return result
+    sh = mv[start: start + Shdr_size]
+    sh_type = unpack_sh_type(sh)
+
+    if sh_type == SHT_SYMTAB:
+      found_symbols = True
+    elif sh_type == SHT_NOTE:
+      sh_offset, sh_size = handle_Shdr(sh)
+      if sh_offset is None:
+        continue
+      note_hdr = mv[sh_offset: sh_offset + sh_size]
+      result = handle_Nhdr(note_hdr, sh_size)
+      if result is not None:
+        bid = result
+
+  return (found_symbols, bid)
 
-def get_buildid(filename):
+def read_elf(filename):
   with open(filename, "r") as fd:
     if os.fstat(fd.fileno()).st_size < Ehdr_size:
       return None
@@ -200,7 +216,7 @@ class Symbolizer:
       if os.path.exists(full_path):
         return full_path
     if name not in self.__warnings:
-      print("Could not find symbols for", name, file=sys.stderr)
+      print("Could not find symbols for {} (Build ID: {})".format(name, buildid), file=sys.stderr)
       self.__warnings.add(name)
     return None
 
@@ -268,13 +284,16 @@ class Symbolizer:
         for fn in fnames:
           filename = os.path.join(dname, fn)
           try:
-            bid = get_buildid(filename)
+            found_symbols, bid = read_elf(filename)
           except FileNotFoundError:
             continue
           except Exception as e:
             print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
             continue
-          if bid is not None:
+          if found_symbols and bid is not None:
+            if bid in self.__index:
+              print("Duplicate build ID {} for {} and {}".format(bid, self.__index[bid], filename), file=sys.stderr)
+              sys.exit(1)
             self.__index[bid] = filename
 
   def symbolize_line(self, line):

>From a61dedb04f09e0e4dd98521195db79e70bdba545 Mon Sep 17 00:00:00 2001
From: Stefan Bossbaly <sboss at meta.com>
Date: Thu, 17 Apr 2025 11:04:27 -0400
Subject: [PATCH 2/2] fixup! [HWASan] Fix symbol indexing

---
 .../lib/hwasan/scripts/hwasan_symbolize       | 48 ++++++++++++++-----
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index a809491ef676d..a5bcf8248a9ef 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -37,12 +37,12 @@ if sys.version_info.major < 3:
 Ehdr_size = 64
 e_shnum_offset = 60
 e_shoff_offset = 40
-
+e_shstrndx_offset = 62
 Shdr_size = 64
+sh_name_offset = 0
 sh_type_offset = 4
 sh_offset_offset = 24
 sh_size_offset = 32
-SHT_SYMTAB = 2
 SHT_NOTE = 7
 
 Nhdr_size = 12
@@ -63,18 +63,32 @@ def handle_Nhdr(mv, sh_size):
     offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
   return None
 
+def handle_shstrtab(mv, e_shoff):
+  e_shstrndx, = struct.unpack_from('<H', buffer=mv, offset=e_shstrndx_offset)
+  
+  start_shstrndx = e_shoff + e_shstrndx * Shdr_size
+  shstrndx_sh = mv[start_shstrndx: start_shstrndx + Shdr_size]
+  _, shstrndx_sh_offset, shstrndx_sh_size = handle_Shdr(shstrndx_sh)
+  return mv[shstrndx_sh_offset:shstrndx_sh_offset + shstrndx_sh_size]
+
+def shstrtab_sh_name(mv):
+  name = ""
+  for byte in mv:
+    char = chr(byte)
+    if char == '\x00':
+      break
+    name += char
+  return name
+
 def unpack_sh_type(mv):
   sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
   return sh_type
 
 def handle_Shdr(mv):
-  sh_type = unpack_sh_type(mv)
-  # Sanity check
-  if sh_type != SHT_NOTE:
-    return None, None
+  name_offset, = struct.unpack_from('<I', buffer=mv, offset=sh_name_offset)
   sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
   sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
-  return sh_offset, sh_size
+  return name_offset, sh_offset, sh_size
 
 def handle_elf(mv):
   # \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
@@ -86,15 +100,20 @@ def handle_elf(mv):
   bid = None
   e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
   e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
+
+  # Section where all the section header names are stored
+  shstr = handle_shstrtab(mv, e_shoff)
+
   for i in range(0, e_shnum):
     start = e_shoff + i * Shdr_size
     sh = mv[start: start + Shdr_size]
+    sh_name_offset, sh_offset, sh_size = handle_Shdr(sh)
+    sh_name = shstrtab_sh_name(shstr[sh_name_offset:])
     sh_type = unpack_sh_type(sh)
 
-    if sh_type == SHT_SYMTAB:
+    if sh_name == ".debug_info":
       found_symbols = True
-    elif sh_type == SHT_NOTE:
-      sh_offset, sh_size = handle_Shdr(sh)
+    if sh_type == SHT_NOTE:
       if sh_offset is None:
         continue
       note_hdr = mv[sh_offset: sh_offset + sh_size]
@@ -102,7 +121,10 @@ def handle_elf(mv):
       if result is not None:
         bid = result
 
-  return (found_symbols, bid)
+  if found_symbols:
+    return bid
+  else:
+    return None
 
 def read_elf(filename):
   with open(filename, "r") as fd:
@@ -284,13 +306,13 @@ class Symbolizer:
         for fn in fnames:
           filename = os.path.join(dname, fn)
           try:
-            found_symbols, bid = read_elf(filename)
+            bid = read_elf(filename)
           except FileNotFoundError:
             continue
           except Exception as e:
             print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
             continue
-          if found_symbols and bid is not None:
+          if bid is not None:
             if bid in self.__index:
               print("Duplicate build ID {} for {} and {}".format(bid, self.__index[bid], filename), file=sys.stderr)
               sys.exit(1)



More information about the llvm-commits mailing list