[llvm-branch-commits] [lld] release/21.x: [lld][LoongArch] GOT indirection to PC relative optimization (#123743) (PR #151794)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 1 21:22:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport 283c47b4c5231a1baf528355f7119a73ac168968
Requested by: @<!-- -->brad0
---
Full diff: https://github.com/llvm/llvm-project/pull/151794.diff
3 Files Affected:
- (modified) lld/ELF/Arch/LoongArch.cpp (+117)
- (added) lld/test/ELF/loongarch-pc-hi20-lo12-got.s (+145)
- (modified) lld/test/ELF/loongarch-relax-pc-hi20-lo12.s (+6-4)
``````````diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index a14553018fc36..8802c8c2e7f01 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
private:
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+ bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+ const Relocation &rLo12, uint64_t secAddr) const;
};
} // end anonymous namespace
@@ -1155,6 +1157,78 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
}
}
+// Try GOT indirection to PC relative optimization.
+// From:
+// * pcalau12i $a0, %got_pc_hi20(sym_got)
+// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
+// To:
+// * pcalau12i $a0, %pc_hi20(sym)
+// * addi.w/d $a0, $a0, %pc_lo12(sym)
+//
+// Note: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries will increase code
+// complexity.
+bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+ const Relocation &rLo12, uint64_t secAddr) const {
+ // Check if the relocations apply to consecutive instructions.
+ if (rHi20.offset + 4 != rLo12.offset)
+ return false;
+
+ // Check if the relocations reference the same symbol and skip undefined,
+ // preemptible and STT_GNU_IFUNC symbols.
+ if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym->isDefined() ||
+ rHi20.sym->isPreemptible || rHi20.sym->isGnuIFunc())
+ return false;
+
+ // GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
+ // in position-independent code because these instructions produce a relative
+ // address.
+ if ((ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+ return false;
+
+ // Check if the addends of the both relocations are zero.
+ if (rHi20.addend != 0 || rLo12.addend != 0)
+ return false;
+
+ const uint32_t currInsn = read32le(loc);
+ const uint32_t nextInsn = read32le(loc + 4);
+ const uint32_t ldOpcode = ctx.arg.is64 ? LD_D : LD_W;
+ // Check if the first instruction is PCALAU12I and the second instruction is
+ // LD.
+ if ((currInsn & 0xfe000000) != PCALAU12I ||
+ (nextInsn & 0xffc00000) != ldOpcode)
+ return false;
+
+ // Check if use the same register.
+ if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
+ return false;
+
+ Symbol &sym = *rHi20.sym;
+ uint64_t symLocal = sym.getVA(ctx);
+ const int64_t displace = symLocal - getLoongArchPage(secAddr + rHi20.offset);
+ // Check if the symbol address is in
+ // [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
+ const int64_t underflow = -0x80000000LL - 0x800;
+ const int64_t overflow = 0x80000000LL - 0x800;
+ if (!(displace >= underflow && displace < overflow))
+ return false;
+
+ Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+ rHi20.addend, &sym};
+ Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+ &sym};
+ uint64_t pageDelta =
+ getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
+ // pcalau12i $a0, %pc_hi20
+ write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
+ relocate(loc, newRHi20, pageDelta);
+ // addi.w/d $a0, $a0, %pc_lo12
+ write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
+ getJ5(nextInsn), 0));
+ relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
+ return true;
+}
+
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1172,6 +1246,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
return expr;
}
+static bool pairForGotRels(ArrayRef<Relocation> relocs) {
+ // Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
+ // pairs.
+ size_t i = 0;
+ const size_t size = relocs.size();
+ for (; i != size; ++i) {
+ if (relocs[i].type == R_LARCH_GOT_PC_HI20) {
+ if (i + 1 < size && relocs[i + 1].type == R_LARCH_GOT_PC_LO12) {
+ ++i;
+ continue;
+ }
+ if (relaxable(relocs, i) && i + 2 < size &&
+ relocs[i + 2].type == R_LARCH_GOT_PC_LO12) {
+ i += 2;
+ continue;
+ }
+ break;
+ } else if (relocs[i].type == R_LARCH_GOT_PC_LO12) {
+ break;
+ }
+ }
+ return i == size;
+}
+
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
const unsigned bits = ctx.arg.is64 ? 64 : 32;
uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1181,6 +1279,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
secAddr += ehIn->getParent()->outSecOff;
bool isExtreme = false, isRelax = false;
const MutableArrayRef<Relocation> relocs = sec.relocs();
+ const bool isPairForGotRels = pairForGotRels(relocs);
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
@@ -1264,6 +1363,24 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
tlsdescToLe(loc, rel, val);
}
continue;
+ case RE_LOONGARCH_GOT_PAGE_PC:
+ // In LoongArch, we try GOT indirection to PC relative optimization in
+ // normal or medium code model, whether or not with R_LARCH_RELAX
+ // relocation. Moreover, if the original code sequence can be relaxed to a
+ // single instruction `pcaddi`, the first instruction will be removed and
+ // it will not reach here.
+ if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) {
+ bool isRelax = relaxable(relocs, i);
+ const Relocation lo12Rel = isRelax ? relocs[i + 2] : relocs[i + 1];
+ if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
+ tryGotToPCRel(loc, rel, lo12Rel, secAddr)) {
+ // isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12
+ // !isRelax: skip relocation R_LARCH_GOT_PC_LO12
+ i += isRelax ? 2 : 1;
+ continue;
+ }
+ }
+ break;
default:
break;
}
diff --git a/lld/test/ELF/loongarch-pc-hi20-lo12-got.s b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
new file mode 100644
index 0000000000000..acd94007d0ffc
--- /dev/null
+++ b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
@@ -0,0 +1,145 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 a.s -o a.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 unpaired.s -o unpaired.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 lone-ldr.s -o lone-ldr.o
+
+# RUN: ld.lld a.o -T within-range.t -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
+
+## This test verifies the encoding when the register $a0 is used.
+# CHECK: pcalau12i $a0, 0
+# CHECK-NEXT: addi.d $a0, $a0, -2048
+
+## PCALAU12I contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a1, 2
+# CHECK-NEXT: ld.d $a1, $a1, -2048
+
+## LD contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a2, 2
+# CHECK-NEXT: ld.d $a2, $a2, -2040
+
+## PCALAU12I and LD use different registers, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a3, 2
+# CHECK-NEXT: ld.d $a4, $a3, -2048
+
+## PCALAU12I and LD use different registers, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a5, 2
+# CHECK-NEXT: ld.d $a5, $a6, -2048
+
+# RUN: ld.lld a.o -T underflow-range.t -o a-underflow
+# RUN: llvm-objdump -d --no-show-raw-insn a-underflow | FileCheck --check-prefix=OUTRANGE %s
+
+# RUN: ld.lld a.o -T overflow-range.t -o a-overflow
+# RUN: llvm-objdump -d --no-show-raw-insn a-overflow | FileCheck --check-prefix=OUTRANGE %s
+
+# OUTRANGE: pcalau12i $a0, 1
+# OUTRANGE-NEXT: ld.d $a0, $a0, 0
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld unpaired.o -T within-range.t -o unpaired
+# RUN: llvm-objdump --no-show-raw-insn -d unpaired | FileCheck --check-prefix=UNPAIRED %s
+
+# UNPAIRED: pcalau12i $a0, 2
+# UNPAIRED-NEXT: b 8
+# UNPAIRED-NEXT: pcalau12i $a0, 2
+# UNPAIRED: ld.d $a0, $a0, -2048
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld lone-ldr.o -T within-range.t -o lone-ldr
+# RUN: llvm-objdump --no-show-raw-insn -d lone-ldr | FileCheck --check-prefix=LONE-LDR %s
+
+# LONE-LDR: ld.d $a0, $a0, -2048
+
+## 32-bit code is mostly the same. We only test a few variants.
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 a.32.s -o a.32.o
+# RUN: ld.lld a.32.o -T within-range.t -o a32
+# RUN: llvm-objdump -d --no-show-raw-insn a32 | FileCheck --check-prefix=CHECK32 %s
+
+## This test verifies the encoding when the register $a0 is used.
+# CHECK32: pcalau12i $a0, 0
+# CHECK32-NEXT: addi.w $a0, $a0, -2048
+
+
+## This linker script ensures that .rodata and .text are sufficiently close to
+## each other so that the pcalau12i + ld pair can be relaxed to pcalau12i + add.
+#--- within-range.t
+SECTIONS {
+ .rodata 0x1800: { *(.rodata) }
+ .text 0x2800: { *(.text) }
+ .got 0x3800: { *(.got) }
+}
+
+## This linker script ensures that .rodata and .text are sufficiently far apart
+## so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add.
+#--- underflow-range.t
+SECTIONS {
+ .rodata 0x800-4: { *(.rodata) }
+ .got 0x80002000: { *(.got) }
+ .text 0x80001000: { *(.text) } /* (0x800-4)+2GB+0x800+4 */
+}
+
+#--- overflow-range.t
+SECTIONS {
+ .text 0x1000: { *(.text) }
+ .got 0x2000: { *(.got) }
+ .rodata 0x80000800 : { *(.rodata) } /* 0x1000+2GB-0x800 */
+}
+
+#--- a.s
+## Symbol 'x' is nonpreemptible, the optimization should be applied.
+.rodata
+.hidden x
+x:
+.word 10
+
+.text
+.global _start
+_start:
+ pcalau12i $a0, %got_pc_hi20(x)
+ ld.d $a0, $a0, %got_pc_lo12(x)
+ pcalau12i $a1, %got_pc_hi20(x+1)
+ ld.d $a1, $a1, %got_pc_lo12(x)
+ pcalau12i $a2, %got_pc_hi20(x)
+ ld.d $a2, $a2, %got_pc_lo12(x+8)
+ pcalau12i $a3, %got_pc_hi20(x)
+ ld.d $a4, $a3, %got_pc_lo12(x)
+ pcalau12i $a5, %got_pc_hi20(x)
+ ld.d $a5, $a6, %got_pc_lo12(x)
+
+#--- unpaired.s
+.text
+.hidden x
+x:
+ nop
+.global _start
+_start:
+ pcalau12i $a0, %got_pc_hi20(x)
+ b L
+ pcalau12i $a0, %got_pc_hi20(x)
+L:
+ ld.d $a0, $a0, %got_pc_lo12(x)
+
+#--- lone-ldr.s
+.text
+.hidden x
+x:
+ nop
+.global _start
+_start:
+ ld.d $a0, $a0, %got_pc_lo12(x)
+
+
+#--- a.32.s
+## Symbol 'x' is nonpreemptible, the optimization should be applied.
+.rodata
+.hidden x
+x:
+.word 10
+
+.text
+.global _start
+_start:
+ pcalau12i $a0, %got_pc_hi20(x)
+ ld.w $a0, $a0, %got_pc_lo12(x)
diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
index a33f866506e13..08d5d3e950d84 100644
--- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
+++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
@@ -31,24 +31,26 @@
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX32-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
# NORELAX32-NEXT: pcalau12i $a0, 1024
-# NORELAX32-NEXT: ld.w $a0, $a0, 4
+# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
-# NORELAX32-NEXT: ld.w $a0, $a0, 4
+# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX64-LABEL: <_start>:
## offset exceed range of pcaddi
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX64-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
# NORELAX64-NEXT: pcalau12i $a0, 1024
-# NORELAX64-NEXT: ld.d $a0, $a0, 8
+# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
-# NORELAX64-NEXT: ld.d $a0, $a0, 8
+# NORELAX64-NEXT: addi.d $a0, $a0, 0
## GOT references with non-zero addends. No relaxation.
``````````
</details>
https://github.com/llvm/llvm-project/pull/151794
More information about the llvm-branch-commits
mailing list