[lld] a2715f0 - [AArch64][llvm-objdump] Fix arm64_32 symbolization (#171164)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 17 04:17:42 PST 2025


Author: Ryan Mansfield
Date: 2025-12-17T13:17:39+01:00
New Revision: a2715f031f8fb5691182f8d334f92367a6a9e3cc

URL: https://github.com/llvm/llvm-project/commit/a2715f031f8fb5691182f8d334f92367a6a9e3cc
DIFF: https://github.com/llvm/llvm-project/commit/a2715f031f8fb5691182f8d334f92367a6a9e3cc.diff

LOG: [AArch64][llvm-objdump] Fix arm64_32 symbolization (#171164)

llvm-objdump was missing "literal pool symbol address" comments for
arm64_32 stub disassembly. Fixed by adding 32-bit instruction support
(LDRWui, ADDWri, LDRWl) to AArch64ExternalSymbolizer and aarch64_32
architecture checks to MachODump.cpp symbolization code.

Fixes #49288

Added: 
    llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32
    llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test

Modified: 
    lld/test/MachO/arm64-stubs.s
    llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
    llvm/tools/llvm-objdump/MachODump.cpp

Removed: 
    lld/test/MachO/arm64-32-stubs.s


################################################################################
diff  --git a/lld/test/MachO/arm64-32-stubs.s b/lld/test/MachO/arm64-32-stubs.s
deleted file mode 100644
index a5d48ff5baef3..0000000000000
--- a/lld/test/MachO/arm64-32-stubs.s
+++ /dev/null
@@ -1,60 +0,0 @@
-# REQUIRES: aarch64
-
-## FIXME: This test is very similar to arm64-stubs.s, but has been split into a
-## separate file because llvm-objdump doesn't correctly symbolize arm64_32. In
-## particular, the "literal pool symbol address" comments are missing (PR49944).
-
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/foo.s -o %t/foo.o
-# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/bar.s -o %t/bar.o
-# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/test.s -o %t/test.o
-# RUN: %lld-watchos -dylib -install_name @executable_path/libfoo.dylib %t/foo.o -o %t/libfoo.dylib
-# RUN: %lld-watchos -dylib -install_name @executable_path/libbar.dylib %t/bar.o -o %t/libbar.dylib
-# RUN: %lld-watchos -lSystem %t/libfoo.dylib %t/libbar.dylib %t/test.o -o %t/test -no_fixup_chains
-
-# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test | FileCheck %s
-
-# CHECK:       _main:
-# CHECK-NEXT:  bl 0x[[#%x,FOO:]] ; symbol stub for: _foo
-# CHECK-NEXT:  bl 0x[[#%x,BAR:]] ; symbol stub for: _bar
-# CHECK-NEXT:  ret
-
-# CHECK-LABEL: Contents of (__TEXT,__stubs) section
-# CHECK-NEXT:  [[#BAR]]: adrp x16
-# CHECK-NEXT:            ldr w16, [x16{{.*}}]
-# CHECK-NEXT:            br x16
-# CHECK-NEXT:  [[#FOO]]: adrp x16
-# CHECK-NEXT:            ldr w16, [x16{{.*}}]
-# CHECK-NEXT:            br x16
-
-# CHECK-LABEL: Contents of (__TEXT,__stub_helper) section
-# CHECK-NEXT:  [[#%x,HELPER_HEADER:]]: adrp x17
-# CHECK-NEXT:                          add x17, x17
-# CHECK-NEXT:                          stp x16, x17, [sp, #-16]!
-# CHECK-NEXT:                          adrp x16
-# CHECK-NEXT:                          ldr w16, [x16]
-# CHECK-NEXT:                          br x16
-# CHECK-NEXT:                          ldr w16, 0x[[#%x,BAR_BIND_OFF_ADDR:]]
-# CHECK-NEXT:                          b 0x[[#HELPER_HEADER]]
-# CHECK-NEXT:  [[#BAR_BIND_OFF_ADDR]]: udf #0
-# CHECK-NEXT:                          ldr w16, 0x[[#%x,FOO_BIND_OFF_ADDR:]]
-# CHECK-NEXT:                          b 0x[[#HELPER_HEADER]]
-# CHECK-NEXT:  [[#FOO_BIND_OFF_ADDR]]: udf #11
-
-#--- foo.s
-.globl _foo
-_foo:
-
-#--- bar.s
-.globl _bar
-_bar:
-
-#--- test.s
-.text
-.globl _main
-
-.p2align 2
-_main:
-  bl _foo
-  bl _bar
-  ret

diff  --git a/lld/test/MachO/arm64-stubs.s b/lld/test/MachO/arm64-stubs.s
index 6fd94661d32e2..55e0f0613a6ec 100644
--- a/lld/test/MachO/arm64-stubs.s
+++ b/lld/test/MachO/arm64-stubs.s
@@ -1,4 +1,6 @@
 # REQUIRES: aarch64
+
+## Test arm64 stubs
 # RUN: rm -rf %t; split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o
@@ -6,8 +8,16 @@
 # RUN: %lld -arch arm64 -dylib -install_name @executable_path/libfoo.dylib %t/foo.o -o %t/libfoo.dylib
 # RUN: %lld -arch arm64 -dylib -install_name @executable_path/libbar.dylib %t/bar.o -o %t/libbar.dylib
 # RUN: %lld -arch arm64 -lSystem %t/libfoo.dylib %t/libbar.dylib %t/test.o -o %t/test -no_fixup_chains
+# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test | FileCheck %s -DREG=x16
 
-# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test | FileCheck %s
+## Test arm64_32 stubs
+# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/foo.s -o %t/foo32.o
+# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/bar.s -o %t/bar32.o
+# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/test.s -o %t/test32.o
+# RUN: %lld-watchos -dylib -install_name @executable_path/libfoo.dylib %t/foo32.o -o %t/libfoo32.dylib
+# RUN: %lld-watchos -dylib -install_name @executable_path/libbar.dylib %t/bar32.o -o %t/libbar32.dylib
+# RUN: %lld-watchos -lSystem %t/libfoo32.dylib %t/libbar32.dylib %t/test32.o -o %t/test32 -no_fixup_chains
+# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test32 | FileCheck %s -DREG=w16
 
 # CHECK:       _main:
 # CHECK-NEXT:  bl 0x[[#%x,FOO:]] ; symbol stub for: _foo
@@ -16,10 +26,10 @@
 
 # CHECK-LABEL: Contents of (__TEXT,__stubs) section
 # CHECK-NEXT:  [[#BAR]]: adrp x16
-# CHECK-NEXT:            ldr x16, [x16{{.*}}] ; literal pool symbol address: _bar
+# CHECK-NEXT:            ldr [[REG]], [x16{{.*}}] ; literal pool symbol address: _bar
 # CHECK-NEXT:            br x16
 # CHECK-NEXT:  [[#FOO]]: adrp x16
-# CHECK-NEXT:            ldr x16, [x16{{.*}}] ; literal pool symbol address: _foo
+# CHECK-NEXT:            ldr [[REG]], [x16{{.*}}] ; literal pool symbol address: _foo
 # CHECK-NEXT:            br x16
 
 # CHECK-LABEL: Contents of (__TEXT,__stub_helper) section
@@ -27,7 +37,7 @@
 # CHECK-NEXT:                          add x17, x17
 # CHECK-NEXT:                          stp x16, x17, [sp, #-16]!
 # CHECK-NEXT:                          adrp x16
-# CHECK-NEXT:                          ldr x16, [x16] ; literal pool symbol address: dyld_stub_binder
+# CHECK-NEXT:                          ldr [[REG]], [x16] ; literal pool symbol address: dyld_stub_binder
 # CHECK-NEXT:                          br x16
 # CHECK-NEXT:                          ldr w16, 0x[[#%x,BAR_BIND_OFF_ADDR:]]
 # CHECK-NEXT:                          b 0x[[#HELPER_HEADER]]

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 3a8a0ef46b035..55e03ee42bb79 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -104,14 +104,20 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
         CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
                                               Value * 0x1000);
     } else if (MI.getOpcode() == AArch64::ADDXri ||
+               MI.getOpcode() == AArch64::ADDWri ||
                MI.getOpcode() == AArch64::LDRXui ||
+               MI.getOpcode() == AArch64::LDRWui ||
                MI.getOpcode() == AArch64::LDRXl ||
+               MI.getOpcode() == AArch64::LDRWl ||
                MI.getOpcode() == AArch64::ADR) {
-      if (MI.getOpcode() == AArch64::ADDXri)
+      if (MI.getOpcode() == AArch64::ADDXri ||
+          MI.getOpcode() == AArch64::ADDWri)
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
-      else if (MI.getOpcode() == AArch64::LDRXui)
+      else if (MI.getOpcode() == AArch64::LDRXui ||
+               MI.getOpcode() == AArch64::LDRWui)
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
-      if (MI.getOpcode() == AArch64::LDRXl) {
+      if (MI.getOpcode() == AArch64::LDRXl ||
+          MI.getOpcode() == AArch64::LDRWl) {
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
                      &ReferenceName);
@@ -123,9 +129,22 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
         // otool expects the fully encoded ADD/LDR instruction to be passed in
         // as the value here, so reconstruct it:
-        unsigned EncodedInst =
-          MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
-        EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
+        unsigned EncodedInst;
+        switch (MI.getOpcode()) {
+        case AArch64::ADDXri:
+          EncodedInst = 0x91000000;
+          break;
+        case AArch64::ADDWri:
+          EncodedInst = 0x11000000;
+          break;
+        case AArch64::LDRXui:
+          EncodedInst = 0xF9400000;
+          break;
+        default: // LDRWui
+          EncodedInst = 0xB9400000;
+          break;
+        }
+        EncodedInst |= Value << 10; // imm12 (ADD: imm+shift, LDR: offset)
         EncodedInst |=
           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd

diff  --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32 b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32
new file mode 100755
index 0000000000000..93a74e6d90bd7
Binary files /dev/null and b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32 
diff er

diff  --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test
new file mode 100644
index 0000000000000..b6f5de0e788d1
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test
@@ -0,0 +1,23 @@
+# RUN: llvm-objdump -d -m --no-show-raw-insn --section="__TEXT,__stubs" %p/Inputs/symbolized-stubs.exe.macho-arm64_32 | FileCheck %s
+
+## Test that arm64_32 disassembly shows "literal pool symbol address" comments.
+
+# CHECK: Contents of (__TEXT,__stubs) section
+# CHECK-NEXT: adrp x16
+# CHECK-NEXT: ldr w16, [x16] ; literal pool symbol address: _free
+# CHECK-NEXT: br x16
+# CHECK-NEXT: adrp x16
+# CHECK-NEXT: ldr w16, [x16, #0x4] ; literal pool symbol address: _malloc
+# CHECK-NEXT: br x16
+
+## The test binary was generated with:
+##   cat > test.c << 'EOF'
+##   #include <stdlib.h>
+##   int main(void) {
+##       int *p = malloc(4);
+##       free(p);
+##       return 0;
+##   }
+##   EOF
+##   xcrun -sdk watchos clang -arch arm64_32 test.c -o symbolized-stubs.exe.macho-arm64_32
+##   xcrun -sdk watchos strip -x symbolized-stubs.exe.macho-arm64_32

diff  --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 93b9b0d1e4f9b..44d7c11343f93 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7013,7 +7013,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
     // If this is arm64 and the reference is an adrp instruction save the
     // instruction, passed in ReferenceValue and the address of the instruction
     // for use later if we see and add immediate instruction.
-  } else if (info->O->getArch() == Triple::aarch64 &&
+  } else if ((info->O->getArch() == Triple::aarch64 ||
+              info->O->getArch() == Triple::aarch64_32) &&
              *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
     info->adrp_inst = ReferenceValue;
     info->adrp_addr = ReferencePC;
@@ -7027,7 +7028,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
     // this add's Xn register reconstruct the value being referenced and look to
     // see if it is a literal pointer.  Note the add immediate instruction is
     // passed in ReferenceValue.
-  } else if (info->O->getArch() == Triple::aarch64 &&
+  } else if ((info->O->getArch() == Triple::aarch64 ||
+              info->O->getArch() == Triple::aarch64_32) &&
              *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
              ReferencePC - 4 == info->adrp_addr &&
              (info->adrp_inst & 0x9f000000) == 0x90000000 &&
@@ -7057,7 +7059,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
     // matches this add's Xn register reconstruct the value being referenced and
     // look to see if it is a literal pointer.  Note the load register
     // instruction is passed in ReferenceValue.
-  } else if (info->O->getArch() == Triple::aarch64 &&
+  } else if ((info->O->getArch() == Triple::aarch64 ||
+              info->O->getArch() == Triple::aarch64_32) &&
              *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXui &&
              ReferencePC - 4 == info->adrp_addr &&
              (info->adrp_inst & 0x9f000000) == 0x90000000 &&
@@ -7073,8 +7076,10 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
     ldrxui_inst = ReferenceValue;
     ldrxui_imm = (ldrxui_inst >> 10) & 0xfff;
 
+    // The size field (bits [31:30]) determines the scaling.
+    unsigned Scale = (ldrxui_inst >> 30) & 0x3;
     ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) +
-                     (adrp_imm << 12) + (ldrxui_imm << 3);
+                     (adrp_imm << 12) + (ldrxui_imm << Scale);
 
     *ReferenceName =
         GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
@@ -7083,7 +7088,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
   }
   // If this arm64 and is an load register (PC-relative) instruction the
   // ReferenceValue is the PC plus the immediate value.
-  else if (info->O->getArch() == Triple::aarch64 &&
+  else if ((info->O->getArch() == Triple::aarch64 ||
+            info->O->getArch() == Triple::aarch64_32) &&
            (*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXl ||
             *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADR)) {
     *ReferenceName =
@@ -7098,8 +7104,7 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
       *ReferenceName = info->demangled_name;
       *ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
     }
-  }
-  else {
+  } else {
     *ReferenceName = nullptr;
     *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
   }


        


More information about the llvm-commits mailing list