[lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)

Zhaoxin Yang via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 29 00:46:40 PDT 2025


https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123743

>From 5e854ab7e239c318c42cda50e9580ee53ba827b7 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 14 Jan 2025 15:50:49 +0800
Subject: [PATCH 1/6] [lld][LoongArch] GOT indirection to PC relative
 optimization.

In LoongArch, this optimization is only supported when relaxation is enabled.
From:
 * pcalau12i $a0, %got_pc_hi20(sym_got)
 * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
To:
 * pcalau12i $a0, %pc_hi20(sym)
 * addi.w/d $a0, $a0, %pc_lo12(sym)

If the original code sequence can be relaxed into a single instruction
`pcaddi`, this patch will not be taken (see https://).
The implementation related to `got` is split into two locations because
the `relax()` function is part of an iteration fixed-point algorithm. We
should minimize it to achieve better linker performance.

FIXME: Althouth the optimization has been performed, the GOT entries still
exists, similarly to AArch64. Eliminating the entries may be require
additional marking in the common code.
---
 lld/ELF/Arch/LoongArch.cpp                  | 66 +++++++++++++++++++++
 lld/test/ELF/loongarch-relax-pc-hi20-lo12.s | 10 ++--
 2 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 15dcddb13baf7..9d6d2e9d4a66a 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
 private:
   void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
   void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+                     const Relocation &rLo12, uint64_t secAddr) const;
 };
 } // end anonymous namespace
 
@@ -1152,6 +1154,54 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
   }
 }
 
+// Try GOT indirection to PC relative optimization when relaxation is enabled.
+// From:
+//  * pcalau12i $a0, %got_pc_hi20(sym_got)
+//  * ld.w/d    $a0, $a0, %got_pc_lo12(sym_got)
+// To:
+//  * pcalau12i $a0, %pc_hi20(sym)
+//  * addi.w/d  $a0, $a0, %pc_lo12(sym)
+//
+// FIXME: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries may be require
+// additional marking in the common code.
+bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+                              const Relocation &rLo12, uint64_t secAddr) const {
+  if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
+      rHi20.sym->isGnuIFunc() ||
+      (ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+    return false;
+
+  Symbol &sym = *rHi20.sym;
+  uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
+  // Check if the address difference is within +/-2GB range.
+  // For simplicity, the range mentioned here is an approximate estimate and is
+  // not fully equivalent to the entire region that PC-relative addressing can
+  // cover.
+  int64_t pageOffset =
+      getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
+  if (!isInt<20>(pageOffset >> 12))
+    return false;
+
+  Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+                         rHi20.addend, &sym};
+  Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+                         &sym};
+
+  const uint32_t currInsn = read32le(loc);
+  const uint32_t nextInsn = read32le(loc + 4);
+  uint64_t pageDelta =
+      getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
+  // pcalau12i $a0, %pc_hi20
+  write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
+  relocate(loc, newRHi20, pageDelta);
+  // addi.w/d $a0, $a0, %pc_lo12
+  write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
+                          getJ5(nextInsn), 0));
+  relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
+  return true;
+}
+
 // During TLSDESC GD_TO_IE, the converted code sequence always includes an
 // instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
 // in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1261,6 +1311,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
         tlsdescToLe(loc, rel, val);
       }
       continue;
+    case RE_LOONGARCH_GOT_PAGE_PC:
+      // In LoongArch, we try GOT indirection to PC relative optimization only
+      // when relaxation is enabled. This approach avoids determining whether
+      // relocation types are paired and whether the destination register of
+      // pcalau12i is only used by the immediately following instruction.
+      // Moreover, if the original code sequence can be relaxed to a single
+      // instruction `pcaddi`, the first instruction will be removed and it will
+      // not reach here.
+      if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
+          relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
+          tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
+        i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
+                   // R_LARCH_RELAX
+        continue;
+      }
+      break;
     default:
       break;
     }
diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
index a417d89e9fa2e..c0bf6b3ba2811 100644
--- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
+++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
@@ -30,24 +30,26 @@
 ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
 # NORELAX32-NEXT:  10000:  pcalau12i     $a0, 1024
 # NORELAX32-NEXT:          addi.w        $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
-# NORELAX32-NEXT:          ld.w          $a0, $a0, 4
+# NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
 # NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
-# NORELAX32-NEXT:          ld.w          $a0, $a0, 4
+# NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 
 # NORELAX64-LABEL: <_start>:
 ## offset exceed range of pcaddi
 ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
 # NORELAX64-NEXT:  10000:  pcalau12i     $a0, 1024
 # NORELAX64-NEXT:          addi.d        $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
-# NORELAX64-NEXT:          ld.d          $a0, $a0, 8
+# NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
 # NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
-# NORELAX64-NEXT:          ld.d          $a0, $a0, 8
+# NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 
 .section .text
 .global _start

>From c18b57d47b33c1dd91dee4595cd42cfe51c9ee0c Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 21 Jan 2025 09:09:23 +0800
Subject: [PATCH 2/6] Add check for register.

---
 lld/ELF/Arch/LoongArch.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 9d6d2e9d4a66a..17e3e4ba70eb3 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1190,6 +1190,10 @@ bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
 
   const uint32_t currInsn = read32le(loc);
   const uint32_t nextInsn = read32le(loc + 4);
+  // Check if use the same register.
+  if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
+    return false;
+
   uint64_t pageDelta =
       getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
   // pcalau12i $a0, %pc_hi20

>From 9b06f463928cc2450bd38cfa99b47659be1e4417 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Wed, 22 Jan 2025 14:02:31 +0800
Subject: [PATCH 3/6] Delete FIXME according to review.

---
 lld/ELF/Arch/LoongArch.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 17e3e4ba70eb3..88e40752e72a1 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1162,9 +1162,9 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
 //  * pcalau12i $a0, %pc_hi20(sym)
 //  * addi.w/d  $a0, $a0, %pc_lo12(sym)
 //
-// FIXME: Althouth the optimization has been performed, the GOT entries still
-// exists, similarly to AArch64. Eliminating the entries may be require
-// additional marking in the common code.
+// Note: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries will increase code
+// complexity.
 bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
                               const Relocation &rLo12, uint64_t secAddr) const {
   if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||

>From 2fa2d5ce0c5fb21ee07f517f1064179d5104f65b Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Thu, 24 Jul 2025 17:45:53 +0800
Subject: [PATCH 4/6] fix code style

---
 lld/ELF/Arch/LoongArch.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 88e40752e72a1..5f4efef2d943f 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1183,17 +1183,16 @@ bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
   if (!isInt<20>(pageOffset >> 12))
     return false;
 
-  Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
-                         rHi20.addend, &sym};
-  Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
-                         &sym};
-
   const uint32_t currInsn = read32le(loc);
   const uint32_t nextInsn = read32le(loc + 4);
   // Check if use the same register.
   if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
     return false;
 
+  Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+                         rHi20.addend, &sym};
+  Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+                         &sym};
   uint64_t pageDelta =
       getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
   // pcalau12i $a0, %pc_hi20

>From 0ff13d8f96d60e3b8f9480b633b4d59f877a3c0f Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Sat, 26 Jul 2025 17:30:24 +0800
Subject: [PATCH 5/6] split got optimization and relax and fix the range of
 tryGotToPCRel

---
 lld/ELF/Arch/LoongArch.cpp                | 100 +++++++++++----
 lld/test/ELF/loongarch-pc-hi20-lo12-got.s | 148 ++++++++++++++++++++++
 2 files changed, 222 insertions(+), 26 deletions(-)
 create mode 100644 lld/test/ELF/loongarch-pc-hi20-lo12-got.s

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 5f4efef2d943f..144f6e9b761b4 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -1154,7 +1154,7 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
   }
 }
 
-// Try GOT indirection to PC relative optimization when relaxation is enabled.
+// Try GOT indirection to PC relative optimization.
 // From:
 //  * pcalau12i $a0, %got_pc_hi20(sym_got)
 //  * ld.w/d    $a0, $a0, %got_pc_lo12(sym_got)
@@ -1167,28 +1167,49 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
 // complexity.
 bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
                               const Relocation &rLo12, uint64_t secAddr) const {
-  if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
-      rHi20.sym->isGnuIFunc() ||
-      (ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+  // Check if the relocations apply to consecutive instructions.
+  if (rHi20.offset + 4 != rLo12.offset)
     return false;
 
-  Symbol &sym = *rHi20.sym;
-  uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
-  // Check if the address difference is within +/-2GB range.
-  // For simplicity, the range mentioned here is an approximate estimate and is
-  // not fully equivalent to the entire region that PC-relative addressing can
-  // cover.
-  int64_t pageOffset =
-      getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
-  if (!isInt<20>(pageOffset >> 12))
+  // Check if the relocations reference the same symbol and skip undefined,
+  // preemptible and STT_GNU_IFUNC symbols.
+  if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym->isDefined() ||
+      rHi20.sym->isPreemptible || rHi20.sym->isGnuIFunc())
+    return false;
+
+  // GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
+  // in position-independent code because these instructions produce a relative
+  // address.
+  if ((ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+    return false;
+
+  // Check if the addends of the both relocations are zero.
+  if (rHi20.addend != 0 || rLo12.addend != 0)
     return false;
 
   const uint32_t currInsn = read32le(loc);
   const uint32_t nextInsn = read32le(loc + 4);
+  const uint32_t ldOpcode = ctx.arg.is64 ? LD_D : LD_W;
+  // Check if the first instruction is PCALAU12I and the second instruction is
+  // LD.
+  if ((currInsn & 0xfe000000) != PCALAU12I ||
+      (nextInsn & 0xffc00000) != ldOpcode)
+    return false;
+
   // Check if use the same register.
   if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
     return false;
 
+  Symbol &sym = *rHi20.sym;
+  uint64_t symLocal = sym.getVA(ctx);
+  const int64_t displace = symLocal - getLoongArchPage(secAddr + rHi20.offset);
+  // Check if the symbol address is in
+  // [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
+  const int64_t underflow = -0x80000000LL - 0x800;
+  const int64_t overflow = 0x80000000LL - 0x800;
+  if (!(displace >= underflow && displace < overflow))
+    return false;
+
   Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
                          rHi20.addend, &sym};
   Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
@@ -1222,6 +1243,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
   return expr;
 }
 
+static bool pairForGotRels(ArrayRef<Relocation> relocs) {
+  // Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
+  // pairs.
+  size_t i = 0;
+  const size_t size = relocs.size();
+  for (; i != size; ++i) {
+    if (relocs[i].type == R_LARCH_GOT_PC_HI20) {
+      if (i + 1 < size && relocs[i + 1].type == R_LARCH_GOT_PC_LO12) {
+        ++i;
+        continue;
+      }
+      if (relaxable(relocs, i) && i + 2 < size &&
+          relocs[i + 2].type == R_LARCH_GOT_PC_LO12) {
+        i += 2;
+        continue;
+      }
+      break;
+    } else if (relocs[i].type == R_LARCH_GOT_PC_LO12) {
+      break;
+    }
+  }
+  return i == size;
+}
+
 void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
   const unsigned bits = ctx.arg.is64 ? 64 : 32;
   uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1231,6 +1276,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
     secAddr += ehIn->getParent()->outSecOff;
   bool isExtreme = false, isRelax = false;
   const MutableArrayRef<Relocation> relocs = sec.relocs();
+  const bool isPairForGotRels = pairForGotRels(relocs);
   for (size_t i = 0, size = relocs.size(); i != size; ++i) {
     Relocation &rel = relocs[i];
     uint8_t *loc = buf + rel.offset;
@@ -1315,19 +1361,21 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
       }
       continue;
     case RE_LOONGARCH_GOT_PAGE_PC:
-      // In LoongArch, we try GOT indirection to PC relative optimization only
-      // when relaxation is enabled. This approach avoids determining whether
-      // relocation types are paired and whether the destination register of
-      // pcalau12i is only used by the immediately following instruction.
-      // Moreover, if the original code sequence can be relaxed to a single
-      // instruction `pcaddi`, the first instruction will be removed and it will
-      // not reach here.
-      if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
-          relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
-          tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
-        i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
-                   // R_LARCH_RELAX
-        continue;
+      // In LoongArch, we try GOT indirection to PC relative optimization in
+      // normal or medium code model, whether or not with R_LARCH_RELAX
+      // relocation. Moreover, if the original code sequence can be relaxed to a
+      // single instruction `pcaddi`, the first instruction will be removed and
+      // it will not reach here.
+      if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) {
+        bool isRelax = relaxable(relocs, i);
+        const Relocation lo12Rel = isRelax ? relocs[i + 2] : relocs[i + 1];
+        if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
+            tryGotToPCRel(loc, rel, lo12Rel, secAddr)) {
+          // isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12
+          // !isRelax: skip relocation R_LARCH_GOT_PC_LO12
+          i += isRelax ? 2 : 1;
+          continue;
+        }
       }
       break;
     default:
diff --git a/lld/test/ELF/loongarch-pc-hi20-lo12-got.s b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
new file mode 100644
index 0000000000000..4a7f4fc4908b8
--- /dev/null
+++ b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
@@ -0,0 +1,148 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 a.s -o a.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 unpaired.s -o unpaired.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 lone-ldr.s -o lone-ldr.o
+
+# RUN: ld.lld a.o -T within-range.t -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
+
+## This test verifies the encoding when the register $a0 is used.
+# CHECK:      pcalau12i $a0, 0
+# CHECK-NEXT: addi.d    $a0, $a0, -2048
+
+## PCALAU12I contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a1, 2
+# CHECK-NEXT: ld.d      $a1, $a1, -2048
+
+## LD contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a2, 2
+# CHECK-NEXT: ld.d      $a2, $a2, -2040
+
+## PCALAU12I and LD use different registers, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a3, 2
+# CHECK-NEXT: ld.d      $a4, $a3, -2048
+
+## PCALAU12I and LD use different registers, no relaxations should be applied.
+# CHECK-NEXT: pcalau12i $a5, 2
+# CHECK-NEXT: ld.d      $a5, $a6, -2048
+
+# RUN: ld.lld a.o -T underflow-range.t -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck --check-prefix=OUTRANGE %s
+
+# RUN: ld.lld a.o -T overflow-range.t -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck --check-prefix=OUTRANGE %s
+
+# OUTRANGE:      pcalau12i $a0, 1
+# OUTRANGE-NEXT: ld.d      $a0, $a0, 0
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld unpaired.o -T within-range.t  -o unpaired
+# RUN: llvm-objdump --no-show-raw-insn -d unpaired | FileCheck --check-prefix=UNPAIRED %s
+
+# UNPAIRED:         pcalau12i $a0, 2
+# UNPAIRED-NEXT:    b         8
+# UNPAIRED-NEXT:    pcalau12i $a0, 2
+# UNPAIRED:    ld.d      $a0, $a0, -2048
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld lone-ldr.o -T within-range.t -o lone-ldr
+# RUN: llvm-objdump --no-show-raw-insn -d lone-ldr | FileCheck --check-prefix=LONE-LDR %s
+
+# LONE-LDR:         ld.d   $a0, $a0, -2048
+
+## 32-bit code is mostly the same. We only test a few variants.
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 a.32.s -o a.32.o
+# RUN: ld.lld a.32.o -T within-range.t -o a32
+# RUN: llvm-objdump -d --no-show-raw-insn a32 | FileCheck --check-prefix=CHECK32 %s
+
+## This test verifies the encoding when the register $a0 is used.
+# CHECK32:      pcalau12i $a0, 0
+# CHECK32-NEXT: addi.w    $a0, $a0, -2048
+
+
+## This linker script ensures that .rodata and .text are sufficiently close to
+## each other so that the pcalau12i + ld pair can be relaxed to pcalau12i + add.
+#--- within-range.t
+SECTIONS {
+ .rodata 0x1800: { *(.rodata) }
+ .text   0x2800: { *(.text) }
+ .got    0x3800: { *(.got) }
+}
+
+## This linker script ensures that .rodata and .text are sufficiently far apart
+## so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add.
+#--- underflow-range.t
+SECTIONS {
+ .rodata 0x800-4: { *(.rodata) }
+ .got    0x80002000: { *(.got) }
+ .text   0x80001000: { *(.text) }  /* (0x800-4)+2GB+0x800+4 */
+}
+
+#--- overflow-range.t
+SECTIONS {
+ .text   0x1000: { *(.text) }
+ .got    0x2000: { *(.got) }
+ .rodate 0x80000800 : { *(.rodata) }  /* 0x1000+2GB-0x800 */
+}
+
+## This linker script ensures that .rodata and .text are sufficiently (>4GB)
+## far apart so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add.
+
+#--- a.s
+## Symbol 'x' is nonpreemptible, the optimization should be applied.
+.rodata
+.hidden x
+x:
+.word 10
+
+.text
+.global _start
+_start:
+  pcalau12i $a0, %got_pc_hi20(x)
+  ld.d      $a0, $a0, %got_pc_lo12(x)
+  pcalau12i $a1, %got_pc_hi20(x+1)
+  ld.d      $a1, $a1, %got_pc_lo12(x)
+  pcalau12i $a2, %got_pc_hi20(x)
+  ld.d      $a2, $a2, %got_pc_lo12(x+8)
+  pcalau12i $a3, %got_pc_hi20(x)
+  ld.d      $a4, $a3, %got_pc_lo12(x)
+  pcalau12i $a5, %got_pc_hi20(x)
+  ld.d      $a5, $a6, %got_pc_lo12(x)
+
+#--- unpaired.s
+.text
+.hidden x
+x:
+  nop
+.global _start
+_start:
+  pcalau12i $a0, %got_pc_hi20(x)
+  b L
+  pcalau12i $a0, %got_pc_hi20(x)
+L:
+  ld.d      $a0, $a0, %got_pc_lo12(x)
+
+#--- lone-ldr.s
+.text
+.hidden x
+x:
+  nop
+.global _start
+_start:
+  ld.d     $a0, $a0, %got_pc_lo12(x)
+
+
+#--- a.32.s
+## Symbol 'x' is nonpreemptible, the optimization should be applied.
+.rodata
+.hidden x
+x:
+.word 10
+
+.text
+.global _start
+_start:
+  pcalau12i $a0, %got_pc_hi20(x)
+  ld.w      $a0, $a0, %got_pc_lo12(x)

>From 3cb21ed1dc0bc2c5f2eb982a03430ff3ac894325 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 29 Jul 2025 15:42:09 +0800
Subject: [PATCH 6/6] some fixes accroding to SixWeining's review.

---
 lld/test/ELF/loongarch-pc-hi20-lo12-got.s | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/lld/test/ELF/loongarch-pc-hi20-lo12-got.s b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
index 4a7f4fc4908b8..acd94007d0ffc 100644
--- a/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
+++ b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s
@@ -28,11 +28,11 @@
 # CHECK-NEXT: pcalau12i $a5, 2
 # CHECK-NEXT: ld.d      $a5, $a6, -2048
 
-# RUN: ld.lld a.o -T underflow-range.t -o a
-# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck --check-prefix=OUTRANGE %s
+# RUN: ld.lld a.o -T underflow-range.t -o a-underflow
+# RUN: llvm-objdump -d --no-show-raw-insn a-underflow | FileCheck --check-prefix=OUTRANGE %s
 
-# RUN: ld.lld a.o -T overflow-range.t -o a
-# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck --check-prefix=OUTRANGE %s
+# RUN: ld.lld a.o -T overflow-range.t -o a-overflow
+# RUN: llvm-objdump -d --no-show-raw-insn a-overflow | FileCheck --check-prefix=OUTRANGE %s
 
 # OUTRANGE:      pcalau12i $a0, 1
 # OUTRANGE-NEXT: ld.d      $a0, $a0, 0
@@ -44,7 +44,7 @@
 # UNPAIRED:         pcalau12i $a0, 2
 # UNPAIRED-NEXT:    b         8
 # UNPAIRED-NEXT:    pcalau12i $a0, 2
-# UNPAIRED:    ld.d      $a0, $a0, -2048
+# UNPAIRED:         ld.d      $a0, $a0, -2048
 
 ## Relocations do not appear in pairs, no relaxations should be applied.
 # RUN: ld.lld lone-ldr.o -T within-range.t -o lone-ldr
@@ -84,12 +84,9 @@ SECTIONS {
 SECTIONS {
  .text   0x1000: { *(.text) }
  .got    0x2000: { *(.got) }
- .rodate 0x80000800 : { *(.rodata) }  /* 0x1000+2GB-0x800 */
+ .rodata 0x80000800 : { *(.rodata) }  /* 0x1000+2GB-0x800 */
 }
 
-## This linker script ensures that .rodata and .text are sufficiently (>4GB)
-## far apart so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add.
-
 #--- a.s
 ## Symbol 'x' is nonpreemptible, the optimization should be applied.
 .rodata



More information about the llvm-commits mailing list