[lld] [llvm] [AArch64][llvm-objdump] Fix arm64_32 symbolization (PR49944) (PR #171164)
Ryan Mansfield via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 09:47:36 PST 2025
https://github.com/rjmansfield created https://github.com/llvm/llvm-project/pull/171164
llvm-objdump was missing "literal pool symbol address" comments for arm64_32 stub disassembly. Fixed by adding 32-bit instruction support (LDRWui, ADDWri, LDRWl) to AArch64ExternalSymbolizer and aarch64_32 architecture checks to MachODump.cpp symbolization code.
Fixes #49288
>From 3682068b90bb5b242e9430a0e13aa032b4c1826d Mon Sep 17 00:00:00 2001
From: Ryan Mansfield <ryan_mansfield at apple.com>
Date: Mon, 8 Dec 2025 11:55:32 -0500
Subject: [PATCH] [AArch64][llvm-objdump] Fix arm64_32 symbolization (PR49944)
llvm-objdump was missing "literal pool symbol address" comments for
arm64_32 stub disassembly. Fixed by adding 32-bit instruction support
(LDRWui, ADDWri, LDRWl) to AArch64ExternalSymbolizer and aarch64_32
architecture checks to MachODump.cpp symbolization code.
Fixes #49288
---
lld/test/MachO/arm64-32-stubs.s | 10 +++-----
.../AArch64ExternalSymbolizer.cpp | 23 ++++++++++++++----
.../symbolized-stubs.exe.macho-arm64_32 | Bin 0 -> 49448 bytes
...macho-symbolized-disassembly-arm64_32.test | 23 ++++++++++++++++++
llvm/tools/llvm-objdump/MachODump.cpp | 16 ++++++++----
5 files changed, 55 insertions(+), 17 deletions(-)
create mode 100755 llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32
create mode 100644 llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test
diff --git a/lld/test/MachO/arm64-32-stubs.s b/lld/test/MachO/arm64-32-stubs.s
index a5d48ff5baef3..c5033d89f1f7d 100644
--- a/lld/test/MachO/arm64-32-stubs.s
+++ b/lld/test/MachO/arm64-32-stubs.s
@@ -1,9 +1,5 @@
# REQUIRES: aarch64
-## FIXME: This test is very similar to arm64-stubs.s, but has been split into a
-## separate file because llvm-objdump doesn't correctly symbolize arm64_32. In
-## particular, the "literal pool symbol address" comments are missing (PR49944).
-
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/foo.s -o %t/foo.o
# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/bar.s -o %t/bar.o
@@ -21,10 +17,10 @@
# CHECK-LABEL: Contents of (__TEXT,__stubs) section
# CHECK-NEXT: [[#BAR]]: adrp x16
-# CHECK-NEXT: ldr w16, [x16{{.*}}]
+# CHECK-NEXT: ldr w16, [x16{{.*}}] ; literal pool symbol address: _bar
# CHECK-NEXT: br x16
# CHECK-NEXT: [[#FOO]]: adrp x16
-# CHECK-NEXT: ldr w16, [x16{{.*}}]
+# CHECK-NEXT: ldr w16, [x16{{.*}}] ; literal pool symbol address: _foo
# CHECK-NEXT: br x16
# CHECK-LABEL: Contents of (__TEXT,__stub_helper) section
@@ -32,7 +28,7 @@
# CHECK-NEXT: add x17, x17
# CHECK-NEXT: stp x16, x17, [sp, #-16]!
# CHECK-NEXT: adrp x16
-# CHECK-NEXT: ldr w16, [x16]
+# CHECK-NEXT: ldr w16, [x16] ; literal pool symbol address: dyld_stub_binder
# CHECK-NEXT: br x16
# CHECK-NEXT: ldr w16, 0x[[#%x,BAR_BIND_OFF_ADDR:]]
# CHECK-NEXT: b 0x[[#HELPER_HEADER]]
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 3a8a0ef46b035..cbac860a7dff5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -104,14 +104,20 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
Value * 0x1000);
} else if (MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::ADDWri ||
MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui ||
MI.getOpcode() == AArch64::LDRXl ||
+ MI.getOpcode() == AArch64::LDRWl ||
MI.getOpcode() == AArch64::ADR) {
- if (MI.getOpcode() == AArch64::ADDXri)
+ if (MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::ADDWri)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
- else if (MI.getOpcode() == AArch64::LDRXui)
+ else if (MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
- if (MI.getOpcode() == AArch64::LDRXl) {
+ if (MI.getOpcode() == AArch64::LDRXl ||
+ MI.getOpcode() == AArch64::LDRWl) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
@@ -123,8 +129,15 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
// otool expects the fully encoded ADD/LDR instruction to be passed in
// as the value here, so reconstruct it:
- unsigned EncodedInst =
- MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
+ unsigned EncodedInst;
+ if (MI.getOpcode() == AArch64::ADDXri)
+ EncodedInst = 0x91000000;
+ else if (MI.getOpcode() == AArch64::ADDWri)
+ EncodedInst = 0x11000000;
+ else if (MI.getOpcode() == AArch64::LDRXui)
+ EncodedInst = 0xF9400000;
+ else // LDRWui
+ EncodedInst = 0xB9400000;
EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
EncodedInst |=
MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32 b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/symbolized-stubs.exe.macho-arm64_32
new file mode 100755
index 0000000000000000000000000000000000000000..93a74e6d90bd7e6b89c286a63dd1de41f1988b82
GIT binary patch
literal 49448
zcmeI)&1+Ow7{~EvCS!;snkjXm)|!+SQ7MiuiG(6 at V=@$}=n%6AQaDVUs~N}(%uFzn
zE-q;I3NG9f-0ELo=%NFq+d?;8b>Xs$_8%yu?eDpFW- at vC2bAxD=bm%!J@=gFeC}+|
z{QA$||BjgDbEb>pBgMA^X5ZVm`MoJc2$kut%lFE2chu>YsN2|I`+EglLYOZv%=bNy
z)J{5m)rZvd&~j-Bp%rhmIvrK}-cPc8EXs22jV$zL)jQ4B`eL*9Xzv_Tn&Y^hPDQ)t
z80yR+3(c{<`l!BI3-#3pYiWP?J}QaXg6>zM-E-(xueiZ?iV$X}=cmKWo!fUaiMDP=
ziX^Lj?;bwBs0iWVS|7oI?+<C5zwh6xTL#tW_I<AGyWP5Z`<wFY&1_}e&DmLh*tWFx
zrbo9-Xjad`pD(Gp(&;a$e^Gs(-|b$h at 89h&)z?%HD=G>v-8p@%@2i`WKigaWXzO>f
z_on(s6{i&bElTUnMrpaeSgJi<uBrct!gFYSe(uEVkJsP)5M62xe_A_pS@*^j*?cF}
zI+^B@{<aFg6wd2Tx;NEc&T4Nyk41jjDW9o%%)3!1RCyk<N!QKxb4%OL?c}EX?mlj|
z;*|?GE-26XqS+aZ_w`=3&HQ_v>-61 at f&c;tAb<b at 2q1s}0tg_000IagfB*srAb<b@
z2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg%=@ZS^r
zZF at 8KdV9v6|Mc_N+r*MNop*lmZrHZ|9FDf$4Gg`}Vdob*E<LxsIsIDWpSK6X?>Z0u
zteB6sEI*dD2eY=H|Ms)nUi$0w+;p^ata>W(5&Mo`r)S1Tbhf&fp59J}9CSoHeZU0)
z1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IL
zKmY**5I_I{1Q0*~0R#|0009ILKmdUcNZ_f?>?a at 4e%J~E2q1s}0tg_000IagfB*sr
zAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_0
z00IagfB*vTrNFqdt|%51d4>KDpjQgLmf7*W?P#Fyp96~NdpS*zJF0=<K_iYWtW=km
z*Y2yI3-+<U?+X+xNwi2ltSp6iBfh`hio;S|t;G#Hyi%>N+L3I4hJLBIrnphA9`am9
zb49Z#%VpFPP0UWKQZr5ai at vt9(^6*LT1^WFtf?fm=I(?W)mr1LQfcz)rHRRj%Vz%r
DV=SKs
literal 0
HcmV?d00001
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test
new file mode 100644
index 0000000000000..b6f5de0e788d1
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-symbolized-disassembly-arm64_32.test
@@ -0,0 +1,23 @@
+# RUN: llvm-objdump -d -m --no-show-raw-insn --section="__TEXT,__stubs" %p/Inputs/symbolized-stubs.exe.macho-arm64_32 | FileCheck %s
+
+## Test that arm64_32 disassembly shows "literal pool symbol address" comments.
+
+# CHECK: Contents of (__TEXT,__stubs) section
+# CHECK-NEXT: adrp x16
+# CHECK-NEXT: ldr w16, [x16] ; literal pool symbol address: _free
+# CHECK-NEXT: br x16
+# CHECK-NEXT: adrp x16
+# CHECK-NEXT: ldr w16, [x16, #0x4] ; literal pool symbol address: _malloc
+# CHECK-NEXT: br x16
+
+## The test binary was generated with:
+## cat > test.c << 'EOF'
+## #include <stdlib.h>
+## int main(void) {
+## int *p = malloc(4);
+## free(p);
+## return 0;
+## }
+## EOF
+## xcrun -sdk watchos clang -arch arm64_32 test.c -o symbolized-stubs.exe.macho-arm64_32
+## xcrun -sdk watchos strip -x symbolized-stubs.exe.macho-arm64_32
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index f633ed52943da..1926a3fe4af8f 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7013,7 +7013,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
// If this is arm64 and the reference is an adrp instruction save the
// instruction, passed in ReferenceValue and the address of the instruction
// for use later if we see and add immediate instruction.
- } else if (info->O->getArch() == Triple::aarch64 &&
+ } else if ((info->O->getArch() == Triple::aarch64 ||
+ info->O->getArch() == Triple::aarch64_32) &&
*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
info->adrp_inst = ReferenceValue;
info->adrp_addr = ReferencePC;
@@ -7027,7 +7028,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
// this add's Xn register reconstruct the value being referenced and look to
// see if it is a literal pointer. Note the add immediate instruction is
// passed in ReferenceValue.
- } else if (info->O->getArch() == Triple::aarch64 &&
+ } else if ((info->O->getArch() == Triple::aarch64 ||
+ info->O->getArch() == Triple::aarch64_32) &&
*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
ReferencePC - 4 == info->adrp_addr &&
(info->adrp_inst & 0x9f000000) == 0x90000000 &&
@@ -7057,7 +7059,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
// matches this add's Xn register reconstruct the value being referenced and
// look to see if it is a literal pointer. Note the load register
// instruction is passed in ReferenceValue.
- } else if (info->O->getArch() == Triple::aarch64 &&
+ } else if ((info->O->getArch() == Triple::aarch64 ||
+ info->O->getArch() == Triple::aarch64_32) &&
*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXui &&
ReferencePC - 4 == info->adrp_addr &&
(info->adrp_inst & 0x9f000000) == 0x90000000 &&
@@ -7073,8 +7076,10 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
ldrxui_inst = ReferenceValue;
ldrxui_imm = (ldrxui_inst >> 10) & 0xfff;
+ // The size field (bits [31:30]) determines the scaling.
+ unsigned Scale = (ldrxui_inst >> 30) & 0x3;
ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) +
- (adrp_imm << 12) + (ldrxui_imm << 3);
+ (adrp_imm << 12) + (ldrxui_imm << Scale);
*ReferenceName =
GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
@@ -7083,7 +7088,8 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
}
// If this arm64 and is an load register (PC-relative) instruction the
// ReferenceValue is the PC plus the immediate value.
- else if (info->O->getArch() == Triple::aarch64 &&
+ else if ((info->O->getArch() == Triple::aarch64 ||
+ info->O->getArch() == Triple::aarch64_32) &&
(*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXl ||
*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADR)) {
*ReferenceName =
More information about the llvm-commits
mailing list