[lld] [lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36 (PR #127312)
WÁNG Xuěruì via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 15 03:06:26 PST 2025
https://github.com/xen0n updated https://github.com/llvm/llvm-project/pull/127312
>From c2640d56f65200fb038224a6bf91f171f179d339 Mon Sep 17 00:00:00 2001
From: WANG Xuerui <git at xen0n.name>
Date: Tue, 9 Apr 2024 02:16:29 +0800
Subject: [PATCH] [lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36
Relax eligible PCADDU18I + JIRL sequences to B or BL depending on JIRL's
output (link) register. Correctness is maintained on a best-effort basis
by ensuring the underlying instruction pair is PCADDU18I and JIRL, and
that the register operands involved are appropriate.
This is beneficial performance-wise for code compiled with the medium
code model, and enables future changing of the default code model from
"small" to "medium" without runtime performance impact.
---
lld/ELF/Arch/LoongArch.cpp | 47 +++++++++
lld/test/ELF/loongarch-relax-call36.s | 131 ++++++++++++++++++++++++++
2 files changed, 178 insertions(+)
create mode 100644 lld/test/ELF/loongarch-relax-call36.s
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index dbf024eadf100..6cfb307886847 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -55,9 +55,12 @@ enum Op {
ANDI = 0x03400000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
+ PCADDU18I = 0x1e000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
JIRL = 0x4c000000,
+ B = 0x50000000,
+ BL = 0x54000000,
};
enum Reg {
@@ -830,6 +833,45 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
remove = 4;
}
+static bool isInsnPairCall36(uint64_t pair) {
+ const uint32_t insn1 = extractBits(pair, 31, 0);
+ const uint32_t insn2 = extractBits(pair, 63, 32);
+ if ((insn1 & 0xfe000000) != PCADDU18I)
+ return false;
+ if ((insn2 & 0xfc000000) != JIRL)
+ return false;
+
+ const uint32_t rd1 = extractBits(insn1, 4, 0);
+ const uint32_t rd2 = extractBits(insn2, 4, 0);
+ const uint32_t rj2 = extractBits(insn2, 9, 5);
+ if (rd1 != rj2)
+ return false;
+ if (rd2 != R_ZERO && rd2 != R_RA)
+ return false;
+
+ return true;
+}
+
+// Relax R_LARCH_CALL36 pcaddu18i+jirl to b or bl.
+static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, uint32_t &remove) {
+ const Symbol &sym = *r.sym;
+ const uint64_t insnPair = read64le(sec.content().data() + r.offset);
+ if (!isInsnPairCall36(insnPair))
+ return;
+
+ const bool isTail = extractBits(insnPair, 32 + 4, 32 + 0) == R_ZERO;
+ const uint64_t dest =
+ (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
+ const int64_t displace = dest - loc;
+
+ if (isInt<28>(displace) && !(displace & 0x3)) {
+ sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+ sec.relaxAux->writes.push_back(isTail ? B : BL);
+ remove = 4;
+ }
+}
+
static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -874,6 +916,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
break;
+ case R_LARCH_CALL36:
+ if (relaxable(relocs, i))
+ relaxCall(ctx, sec, i, loc, r, remove);
+ break;
}
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -971,6 +1017,7 @@ void LoongArch::finalizeRelax(int passes) const {
switch (newType) {
case R_LARCH_RELAX:
break;
+ case R_LARCH_B26:
case R_LARCH_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
diff --git a/lld/test/ELF/loongarch-relax-call36.s b/lld/test/ELF/loongarch-relax-call36.s
new file mode 100644
index 0000000000000..1ba9dbd688975
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-call36.s
@@ -0,0 +1,131 @@
+# REQUIRES: loongarch
+## Relax R_LARCH_CALL36.
+## Currently only loongarch64 is covered, because the call36 pseudo-instruction
+## is valid for LA64 only, due to LA32 not having pcaddu18i.
+
+# TODO:
+#
+# * trivial cases
+# * +/- limit: -4, 0, +4
+# * align: 0, 1, 2, 3
+# * invalid pcaddu18i + jirl pairs
+# - rd1 != rj2
+# - rd2 not in (0, 1)
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+
+# RUN: ld.lld -T lds a.o -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
+
+## Unsure whether this needs a diagnostic. GNU ld allows this.
+# RUN: ld.lld -T lds -pie a.o -o a.pie
+# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s
+
+# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
+# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefix=CHECK2
+
+# CHECK-LABEL: <_start>:
+# CHECK-NEXT: bl -4 <near_before>
+# CHECK-NEXT: b -8 <near_before>
+# CHECK-NEXT: bl 64 <near_after>
+# CHECK-NEXT: b 60 <near_after>
+# CHECK-NEXT: pcaddu18i $ra, -512
+# CHECK-NEXT: jirl $ra, $ra, -4
+# CHECK-NEXT: bl -134217728 <far_b>
+# CHECK-NEXT: bl 134217724 <far_y>
+# CHECK-NEXT: pcaddu18i $ra, 512
+# CHECK-NEXT: jirl $ra, $ra, 0
+# CHECK-NEXT: pcaddu18i $t0, 0
+# CHECK-NEXT: jirl $t0, $t0, -44
+# CHECK-NEXT: pcaddu18i $t0, 0
+# CHECK-NEXT: jirl $zero, $t1, 24
+# CHECK-NEXT: pcalau12i $t0, 0
+# CHECK-NEXT: jirl $zero, $t0, -60
+# CHECK-NEXT: pcaddu18i $t0, 0
+# CHECK-NEXT: addu16i.d $t0, $t0, 2
+# CHECK-EMPTY:
+
+# CHECK-LABEL: <.mid>:
+# CHECK-NEXT: b 2048
+# CHECK-NEXT: b 2044
+# CHECK-EMPTY:
+
+# CHECK2-LABEL: <.mid>:
+# CHECK2-NEXT: pcaddu18i $t0, 0
+# CHECK2-NEXT: jr $t0
+# CHECK2-NEXT: pcaddu18i $t0, 0
+# CHECK2-NEXT: jr $t0
+# CHECK2-EMPTY:
+
+#--- a.s
+.global _start, ifunc
+near_before:
+ ret
+
+_start:
+ call36 near_before
+ tail36 $t0, near_before
+
+ call36 near_after
+ tail36 $t0, near_after
+
+ call36 far_a ## just out of relaxable range: 0x08000010 - 0x10000014 = -(1 << 27) - 4
+ call36 far_b ## just in relaxable range: 0x0800001c - 0x1000001c = -(1 << 27)
+
+ call36 far_y ## just in relaxable range: 0x1800001c - 0x10000020 = (1 << 27) - 4
+ call36 far_z ## just out of relaxable range: 0x18000024 - 0x10000024 = 1 << 27
+
+ ## broken R_LARCH_CALL36 usages should not be relaxed even if relaxable
+ ## otherwise
+ ## correctness is not guaranteed for malformed input like these
+
+ ## jirl link register (rd) not $zero or $ra (hence not expressible by B or BL)
+ ## the apparent correctness here is only coincidence and should not be relied
+ ## upon
+ .reloc ., R_LARCH_CALL36, near_before
+ .reloc ., R_LARCH_RELAX, 0
+ pcaddu18i $t0, 0
+ jirl $t0, $t0, 0
+
+ ## jirl base != pcaddu18i output
+ .reloc ., R_LARCH_CALL36, near_after
+ .reloc ., R_LARCH_RELAX, 0
+ pcaddu18i $t0, 0
+ jirl $zero, $t1, 0
+
+ ## 1st insn not pcaddu18i
+ .reloc ., R_LARCH_CALL36, near_before
+ .reloc ., R_LARCH_RELAX, 0
+ pcalau12i $t0, 0
+ jirl $zero, $t0, 0
+
+ ## 2nd insn not jirl
+ .reloc ., R_LARCH_CALL36, near_after
+ .reloc ., R_LARCH_RELAX, 0
+ pcaddu18i $t0, 0
+ addu16i.d $t0, $t0, 0
+
+near_after:
+ ret
+
+.section .mid,"ax", at progbits
+.balign 16
+ tail36 $t0, ifunc at plt
+ tail36 $t0, ifunc at plt
+
+.type ifunc, @gnu_indirect_function
+ifunc:
+ ret
+
+#--- lds
+SECTIONS {
+ .text 0x10000000 : { *(.text) }
+ .mid 0x10000800 : { *(.mid) }
+ .iplt 0x10001000 : { *(.iplt) }
+}
+
+far_a = 0x08000010;
+far_b = 0x0800001c;
+far_y = 0x1800001c;
+far_z = 0x18000024;
More information about the llvm-commits
mailing list