[lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)
Zhaoxin Yang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 24 02:50:34 PDT 2025
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123743
>From 5e854ab7e239c318c42cda50e9580ee53ba827b7 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 14 Jan 2025 15:50:49 +0800
Subject: [PATCH 1/4] [lld][LoongArch] GOT indirection to PC relative
optimization.
In LoongArch, this optimization is only supported when relaxation is enabled.
From:
* pcalau12i $a0, %got_pc_hi20(sym_got)
* ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
To:
* pcalau12i $a0, %pc_hi20(sym)
* addi.w/d $a0, $a0, %pc_lo12(sym)
If the original code sequence can be relaxed into a single instruction
`pcaddi`, this patch will not be taken (see https://).
The implementation related to `got` is split into two locations because
the `relax()` function is part of an iteration fixed-point algorithm. We
should minimize it to achieve better linker performance.
FIXME: Althouth the optimization has been performed, the GOT entries still
exists, similarly to AArch64. Eliminating the entries may be require
additional marking in the common code.
---
lld/ELF/Arch/LoongArch.cpp | 66 +++++++++++++++++++++
lld/test/ELF/loongarch-relax-pc-hi20-lo12.s | 10 ++--
2 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 15dcddb13baf7..9d6d2e9d4a66a 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
private:
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+ bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+ const Relocation &rLo12, uint64_t secAddr) const;
};
} // end anonymous namespace
@@ -1152,6 +1154,54 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
}
}
+// Try GOT indirection to PC relative optimization when relaxation is enabled.
+// From:
+// * pcalau12i $a0, %got_pc_hi20(sym_got)
+// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
+// To:
+// * pcalau12i $a0, %pc_hi20(sym)
+// * addi.w/d $a0, $a0, %pc_lo12(sym)
+//
+// FIXME: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries may be require
+// additional marking in the common code.
+bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+ const Relocation &rLo12, uint64_t secAddr) const {
+ if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
+ rHi20.sym->isGnuIFunc() ||
+ (ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+ return false;
+
+ Symbol &sym = *rHi20.sym;
+ uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
+ // Check if the address difference is within +/-2GB range.
+ // For simplicity, the range mentioned here is an approximate estimate and is
+ // not fully equivalent to the entire region that PC-relative addressing can
+ // cover.
+ int64_t pageOffset =
+ getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
+ if (!isInt<20>(pageOffset >> 12))
+ return false;
+
+ Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+ rHi20.addend, &sym};
+ Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+ &sym};
+
+ const uint32_t currInsn = read32le(loc);
+ const uint32_t nextInsn = read32le(loc + 4);
+ uint64_t pageDelta =
+ getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
+ // pcalau12i $a0, %pc_hi20
+ write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
+ relocate(loc, newRHi20, pageDelta);
+ // addi.w/d $a0, $a0, %pc_lo12
+ write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
+ getJ5(nextInsn), 0));
+ relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
+ return true;
+}
+
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1261,6 +1311,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
tlsdescToLe(loc, rel, val);
}
continue;
+ case RE_LOONGARCH_GOT_PAGE_PC:
+ // In LoongArch, we try GOT indirection to PC relative optimization only
+ // when relaxation is enabled. This approach avoids determining whether
+ // relocation types are paired and whether the destination register of
+ // pcalau12i is only used by the immediately following instruction.
+ // Moreover, if the original code sequence can be relaxed to a single
+ // instruction `pcaddi`, the first instruction will be removed and it will
+ // not reach here.
+ if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
+ relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
+ tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
+ i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
+ // R_LARCH_RELAX
+ continue;
+ }
+ break;
default:
break;
}
diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
index a417d89e9fa2e..c0bf6b3ba2811 100644
--- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
+++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
@@ -30,24 +30,26 @@
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX32-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
# NORELAX32-NEXT: pcalau12i $a0, 1024
-# NORELAX32-NEXT: ld.w $a0, $a0, 4
+# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
-# NORELAX32-NEXT: ld.w $a0, $a0, 4
+# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX64-LABEL: <_start>:
## offset exceed range of pcaddi
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX64-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
# NORELAX64-NEXT: pcalau12i $a0, 1024
-# NORELAX64-NEXT: ld.d $a0, $a0, 8
+# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
-# NORELAX64-NEXT: ld.d $a0, $a0, 8
+# NORELAX64-NEXT: addi.d $a0, $a0, 0
.section .text
.global _start
>From c18b57d47b33c1dd91dee4595cd42cfe51c9ee0c Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 21 Jan 2025 09:09:23 +0800
Subject: [PATCH 2/4] Add check for register.
---
lld/ELF/Arch/LoongArch.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 9d6d2e9d4a66a..17e3e4ba70eb3 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1190,6 +1190,10 @@ bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const uint32_t currInsn = read32le(loc);
const uint32_t nextInsn = read32le(loc + 4);
+ // Check if use the same register.
+ if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
+ return false;
+
uint64_t pageDelta =
getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
// pcalau12i $a0, %pc_hi20
>From 9b06f463928cc2450bd38cfa99b47659be1e4417 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Wed, 22 Jan 2025 14:02:31 +0800
Subject: [PATCH 3/4] Delete FIXME according to review.
---
lld/ELF/Arch/LoongArch.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 17e3e4ba70eb3..88e40752e72a1 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1162,9 +1162,9 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
// * pcalau12i $a0, %pc_hi20(sym)
// * addi.w/d $a0, $a0, %pc_lo12(sym)
//
-// FIXME: Althouth the optimization has been performed, the GOT entries still
-// exists, similarly to AArch64. Eliminating the entries may be require
-// additional marking in the common code.
+// Note: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries will increase code
+// complexity.
bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const Relocation &rLo12, uint64_t secAddr) const {
if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
>From 2fa2d5ce0c5fb21ee07f517f1064179d5104f65b Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Thu, 24 Jul 2025 17:45:53 +0800
Subject: [PATCH 4/4] fix code style
---
lld/ELF/Arch/LoongArch.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 88e40752e72a1..5f4efef2d943f 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1183,17 +1183,16 @@ bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
if (!isInt<20>(pageOffset >> 12))
return false;
- Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
- rHi20.addend, &sym};
- Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
- &sym};
-
const uint32_t currInsn = read32le(loc);
const uint32_t nextInsn = read32le(loc + 4);
// Check if use the same register.
if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
return false;
+ Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+ rHi20.addend, &sym};
+ Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+ &sym};
uint64_t pageDelta =
getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
// pcalau12i $a0, %pc_hi20
More information about the llvm-commits
mailing list