[lld] [lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36 (PR #127312)

WÁNG Xuěruì via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 15 03:06:26 PST 2025


https://github.com/xen0n updated https://github.com/llvm/llvm-project/pull/127312

>From c2640d56f65200fb038224a6bf91f171f179d339 Mon Sep 17 00:00:00 2001
From: WANG Xuerui <git at xen0n.name>
Date: Tue, 9 Apr 2024 02:16:29 +0800
Subject: [PATCH] [lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36

Relax eligible PCADDU18I + JIRL sequences to B or BL depending on JIRL's
output (link) register. Correctness is maintained on a best-effort basis
by ensuring the underlying instruction pair is PCADDU18I and JIRL, and
that the register operands involved are appropriate.

This is beneficial performance-wise for code compiled with the medium
code model, and enables future changing of the default code model from
"small" to "medium" without runtime performance impact.
---
 lld/ELF/Arch/LoongArch.cpp            |  47 +++++++++
 lld/test/ELF/loongarch-relax-call36.s | 131 ++++++++++++++++++++++++++
 2 files changed, 178 insertions(+)
 create mode 100644 lld/test/ELF/loongarch-relax-call36.s

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index dbf024eadf100..6cfb307886847 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -55,9 +55,12 @@ enum Op {
   ANDI = 0x03400000,
   PCADDI = 0x18000000,
   PCADDU12I = 0x1c000000,
+  PCADDU18I = 0x1e000000,
   LD_W = 0x28800000,
   LD_D = 0x28c00000,
   JIRL = 0x4c000000,
+  B = 0x50000000,
+  BL = 0x54000000,
 };
 
 enum Reg {
@@ -830,6 +833,45 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
   remove = 4;
 }
 
+static bool isInsnPairCall36(uint64_t pair) {
+  const uint32_t insn1 = extractBits(pair, 31, 0);
+  const uint32_t insn2 = extractBits(pair, 63, 32);
+  if ((insn1 & 0xfe000000) != PCADDU18I)
+    return false;
+  if ((insn2 & 0xfc000000) != JIRL)
+    return false;
+
+  const uint32_t rd1 = extractBits(insn1, 4, 0);
+  const uint32_t rd2 = extractBits(insn2, 4, 0);
+  const uint32_t rj2 = extractBits(insn2, 9, 5);
+  if (rd1 != rj2)
+    return false;
+  if (rd2 != R_ZERO && rd2 != R_RA)
+    return false;
+
+  return true;
+}
+
+// Relax R_LARCH_CALL36 pcaddu18i+jirl to b or bl.
+static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
+                      Relocation &r, uint32_t &remove) {
+  const Symbol &sym = *r.sym;
+  const uint64_t insnPair = read64le(sec.content().data() + r.offset);
+  if (!isInsnPairCall36(insnPair))
+    return;
+
+  const bool isTail = extractBits(insnPair, 32 + 4, 32 + 0) == R_ZERO;
+  const uint64_t dest =
+      (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
+  const int64_t displace = dest - loc;
+
+  if (isInt<28>(displace) && !(displace & 0x3)) {
+    sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+    sec.relaxAux->writes.push_back(isTail ? B : BL);
+    remove = 4;
+  }
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -874,6 +916,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
       if (isPairRelaxable(relocs, i))
         relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
       break;
+    case R_LARCH_CALL36:
+      if (relaxable(relocs, i))
+        relaxCall(ctx, sec, i, loc, r, remove);
+      break;
     }
 
     // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -971,6 +1017,7 @@ void LoongArch::finalizeRelax(int passes) const {
           switch (newType) {
           case R_LARCH_RELAX:
             break;
+          case R_LARCH_B26:
           case R_LARCH_PCREL20_S2:
             skip = 4;
             write32le(p, aux.writes[writesIdx++]);
diff --git a/lld/test/ELF/loongarch-relax-call36.s b/lld/test/ELF/loongarch-relax-call36.s
new file mode 100644
index 0000000000000..1ba9dbd688975
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-call36.s
@@ -0,0 +1,131 @@
+# REQUIRES: loongarch
+## Relax R_LARCH_CALL36.
+## Currently only loongarch64 is covered, because the call36 pseudo-instruction
+## is valid for LA64 only, due to LA32 not having pcaddu18i.
+
+# TODO:
+#
+# * trivial cases
+# * +/- limit: -4, 0, +4
+# * align: 0, 1, 2, 3
+# * invalid pcaddu18i + jirl pairs
+#    - rd1 != rj2
+#    - rd2 not in (0, 1)
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+
+# RUN: ld.lld -T lds a.o -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
+
+## Unsure whether this needs a diagnostic. GNU ld allows this.
+# RUN: ld.lld -T lds -pie a.o -o a.pie
+# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s
+
+# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
+# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefix=CHECK2
+
+# CHECK-LABEL:  <_start>:
+# CHECK-NEXT:             bl -4 <near_before>
+# CHECK-NEXT:             b -8 <near_before>
+# CHECK-NEXT:             bl 64 <near_after>
+# CHECK-NEXT:             b 60 <near_after>
+# CHECK-NEXT:             pcaddu18i $ra, -512
+# CHECK-NEXT:             jirl $ra, $ra, -4
+# CHECK-NEXT:             bl -134217728 <far_b>
+# CHECK-NEXT:             bl 134217724 <far_y>
+# CHECK-NEXT:             pcaddu18i $ra, 512
+# CHECK-NEXT:             jirl $ra, $ra, 0
+# CHECK-NEXT:             pcaddu18i $t0, 0
+# CHECK-NEXT:             jirl $t0, $t0, -44
+# CHECK-NEXT:             pcaddu18i $t0, 0
+# CHECK-NEXT:             jirl $zero, $t1, 24
+# CHECK-NEXT:             pcalau12i $t0, 0
+# CHECK-NEXT:             jirl $zero, $t0, -60
+# CHECK-NEXT:             pcaddu18i $t0, 0
+# CHECK-NEXT:             addu16i.d $t0, $t0, 2
+# CHECK-EMPTY:
+
+# CHECK-LABEL:  <.mid>:
+# CHECK-NEXT:             b 2048
+# CHECK-NEXT:             b 2044
+# CHECK-EMPTY:
+
+# CHECK2-LABEL: <.mid>:
+# CHECK2-NEXT:            pcaddu18i $t0, 0
+# CHECK2-NEXT:            jr $t0
+# CHECK2-NEXT:            pcaddu18i $t0, 0
+# CHECK2-NEXT:            jr $t0
+# CHECK2-EMPTY:
+
+#--- a.s
+.global _start, ifunc
+near_before:
+  ret
+
+_start:
+  call36 near_before
+  tail36 $t0, near_before
+
+  call36 near_after
+  tail36 $t0, near_after
+
+  call36 far_a  ## just out of relaxable range: 0x08000010 - 0x10000014 = -(1 << 27) - 4
+  call36 far_b  ## just in relaxable range: 0x0800001c - 0x1000001c = -(1 << 27)
+
+  call36 far_y  ## just in relaxable range: 0x1800001c - 0x10000020 = (1 << 27) - 4
+  call36 far_z  ## just out of relaxable range: 0x18000024 - 0x10000024 = 1 << 27
+
+  ## broken R_LARCH_CALL36 usages should not be relaxed even if relaxable
+  ## otherwise
+  ## correctness is not guaranteed for malformed input like these
+
+  ## jirl link register (rd) not $zero or $ra (hence not expressible by B or BL)
+  ## the apparent correctness here is only coincidence and should not be relied
+  ## upon
+  .reloc ., R_LARCH_CALL36, near_before
+  .reloc ., R_LARCH_RELAX, 0
+  pcaddu18i $t0, 0
+  jirl      $t0, $t0, 0
+
+  ## jirl base != pcaddu18i output
+  .reloc ., R_LARCH_CALL36, near_after
+  .reloc ., R_LARCH_RELAX, 0
+  pcaddu18i $t0, 0
+  jirl      $zero, $t1, 0
+
+  ## 1st insn not pcaddu18i
+  .reloc ., R_LARCH_CALL36, near_before
+  .reloc ., R_LARCH_RELAX, 0
+  pcalau12i $t0, 0
+  jirl      $zero, $t0, 0
+
+  ## 2nd insn not jirl
+  .reloc ., R_LARCH_CALL36, near_after
+  .reloc ., R_LARCH_RELAX, 0
+  pcaddu18i $t0, 0
+  addu16i.d $t0, $t0, 0
+
+near_after:
+  ret
+
+.section .mid,"ax", at progbits
+.balign 16
+  tail36 $t0, ifunc at plt
+  tail36 $t0, ifunc at plt
+
+.type ifunc, @gnu_indirect_function
+ifunc:
+  ret
+
+#--- lds
+SECTIONS {
+  .text 0x10000000 : { *(.text) }
+  .mid  0x10000800 : { *(.mid) }
+  .iplt 0x10001000 : { *(.iplt) }
+}
+
+far_a = 0x08000010;
+far_b = 0x0800001c;
+far_y = 0x1800001c;
+far_z = 0x18000024;



More information about the llvm-commits mailing list