[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization. (PR #123743)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jan 21 04:23:29 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)

<details>
<summary>Changes</summary>

In LoongArch, this optimization is only supported when relaxation is enabled.
From:
* pcalau12i $a0, %got_pc_hi20(sym_got)
* ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
To:
* pcalau12i $a0, %pc_hi20(sym)
* addi.w/d $a0, $a0, %pc_lo12(sym)

If the original code sequence can be relaxed into a single instruction `pcaddi`, this patch will not be taken (see https://github.com/llvm/llvm-project/pull/123566).
The implementation related to `got` is split into two locations because the `relax()` function is part of an iteration fixed-point algorithm. We should minimize it to achieve better linker performance.
    
FIXME: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries may be require additional marking in the common code.

---
Full diff: https://github.com/llvm/llvm-project/pull/123743.diff


2 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+70) 
- (modified) lld/test/ELF/loongarch-relax-pc-hi20-lo12.s (+6-4) 


``````````diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 5f49b23e8ffb1a..6ae45e109a6dec 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -47,6 +47,8 @@ class LoongArch final : public TargetInfo {
   void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
   void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
   void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+                     const Relocation &rLo12, uint64_t secAddr) const;
 };
 } // end anonymous namespace
 
@@ -1150,6 +1152,58 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
   }
 }
 
+// Try GOT indirection to PC relative optimization when relaxation is enabled.
+// From:
+//  * pcalau12i $a0, %got_pc_hi20(sym_got)
+//  * ld.w/d    $a0, $a0, %got_pc_lo12(sym_got)
+// To:
+//  * pcalau12i $a0, %pc_hi20(sym)
+//  * addi.w/d  $a0, $a0, %pc_lo12(sym)
+//
+// FIXME: Althouth the optimization has been performed, the GOT entries still
+// exists, similarly to AArch64. Eliminating the entries may be require
+// additional marking in the common code.
+bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
+                              const Relocation &rLo12, uint64_t secAddr) const {
+  if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
+      rHi20.sym->isGnuIFunc() ||
+      (ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
+    return false;
+
+  Symbol &sym = *rHi20.sym;
+  uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
+  // Check if the address difference is within +/-2GB range.
+  // For simplicity, the range mentioned here is an approximate estimate and is
+  // not fully equivalent to the entire region that PC-relative addressing can
+  // cover.
+  int64_t pageOffset =
+      getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
+  if (!isInt<20>(pageOffset >> 12))
+    return false;
+
+  Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
+                         rHi20.addend, &sym};
+  Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
+                         &sym};
+
+  const uint32_t currInsn = read32le(loc);
+  const uint32_t nextInsn = read32le(loc + 4);
+  // Check if use the same register.
+  if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
+    return false;
+
+  uint64_t pageDelta =
+      getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
+  // pcalau12i $a0, %pc_hi20
+  write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
+  relocate(loc, newRHi20, pageDelta);
+  // addi.w/d $a0, $a0, %pc_lo12
+  write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
+                          getJ5(nextInsn), 0));
+  relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
+  return true;
+}
+
 // During TLSDESC GD_TO_IE, the converted code sequence always includes an
 // instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
 // in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1259,6 +1313,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
         tlsdescToLe(loc, rel, val);
       }
       continue;
+    case RE_LOONGARCH_GOT_PAGE_PC:
+      // In LoongArch, we try GOT indirection to PC relative optimization only
+      // when relaxation is enabled. This approach avoids determining whether
+      // relocation types are paired and whether the destination register of
+      // pcalau12i is only used by the immediately following instruction.
+      // Moreover, if the original code sequence can be relaxed to a single
+      // instruction `pcaddi`, the first instruction will be removed and it will
+      // not reach here.
+      if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
+          relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
+          tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
+        i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
+                   // R_LARCH_RELAX
+        continue;
+      }
+      break;
     default:
       break;
     }
diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
index 760fe77d774e30..ae3b29e14fb3c1 100644
--- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
+++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
@@ -30,24 +30,26 @@
 ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
 # NORELAX32-NEXT:  10000:  pcalau12i     $a0, 1024
 # NORELAX32-NEXT:          addi.w        $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
-# NORELAX32-NEXT:          ld.w          $a0, $a0, 4
+# NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
 # NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 # NORELAX32-NEXT:          pcalau12i     $a0, 1024
-# NORELAX32-NEXT:          ld.w          $a0, $a0, 4
+# NORELAX32-NEXT:          addi.w        $a0, $a0, 0
 
 # NORELAX64-LABEL: <_start>:
 ## offset exceed range of pcaddi
 ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
 # NORELAX64-NEXT:  10000:  pcalau12i     $a0, 1024
 # NORELAX64-NEXT:          addi.d        $a0, $a0, 0
+## Not relaxation, convertion to PCRel.
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
-# NORELAX64-NEXT:          ld.d          $a0, $a0, 8
+# NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
 # NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 # NORELAX64-NEXT:          pcalau12i     $a0, 1024
-# NORELAX64-NEXT:          ld.d          $a0, $a0, 8
+# NORELAX64-NEXT:          addi.d        $a0, $a0, 0
 
 .section .text
 .global _start

``````````

</details>


https://github.com/llvm/llvm-project/pull/123743


More information about the llvm-branch-commits mailing list