[lld] bd84d66 - [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (#123680)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 7 04:58:51 PDT 2025


Author: Zhaoxin Yang
Date: 2025-04-07T19:58:48+08:00
New Revision: bd84d66700b23132eecea71fb81d6d7378473937

URL: https://github.com/llvm/llvm-project/commit/bd84d66700b23132eecea71fb81d6d7378473937
DIFF: https://github.com/llvm/llvm-project/commit/bd84d66700b23132eecea71fb81d6d7378473937.diff

LOG: [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (#123680)

Original code sequence:
* pcalau12i $a0, %ie_pc_hi20(sym)
* ld.d           $a0, $a0, %ie_pc_lo12(sym)

The code sequence converted is as follows:
* lu12i.w   $a0, %le_hi20(sym)         # le_hi20 != 0, otherwise NOP
* ori          $a0, src, %le_lo12(sym)  # le_hi20 != 0, src = $a0,
                                                         # otherwise,    src = $zero

TODO: When relaxation is enabled, redundant NOP can be removed. This
will be implemented in a future patch.
    
Note: In the normal or medium code model, original code sequence with
relocations allow interleaving, because converted code sequence
calculates the absolute offset. However, in extreme code model, to
identify the current code model, the first four instructions with
relocations must appear consecutively.

Added: 
    lld/test/ELF/loongarch-relax-tls-ie.s

Modified: 
    lld/ELF/Arch/LoongArch.cpp
    lld/ELF/Relocations.cpp
    lld/test/ELF/loongarch-tls-ie.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 4edc625b05cb0..86f1778112a32 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
   bool relaxOnce(int pass) const override;
+  void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   void finalizeRelax(int passes) const override;
 };
 } // end anonymous namespace
@@ -53,6 +54,8 @@ enum Op {
   ADDI_W = 0x02800000,
   ADDI_D = 0x02c00000,
   ANDI = 0x03400000,
+  ORI = 0x03800000,
+  LU12I_W = 0x14000000,
   PCADDI = 0x18000000,
   PCADDU12I = 0x1c000000,
   LD_W = 0x28800000,
@@ -1002,6 +1005,88 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   return changed;
 }
 
+// Convert TLS IE to LE in the normal or medium code model.
+// Original code sequence:
+//  * pcalau12i $a0, %ie_pc_hi20(sym)
+//  * ld.d      $a0, $a0, %ie_pc_lo12(sym)
+//
+// The code sequence converted is as follows:
+//  * lu12i.w   $a0, %le_hi20(sym)      # le_hi20 != 0, otherwise NOP
+//  * ori       $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,
+//                                      # otherwise,    src = $zero
+//
+// When relaxation enables, redundant NOPs can be removed.
+static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
+  assert(isInt<32>(val) &&
+         "val exceeds the range of medium code model in tlsIeToLe");
+
+  bool isUInt12 = isUInt<12>(val);
+  const uint32_t currInsn = read32le(loc);
+  switch (rel.type) {
+  case R_LARCH_TLS_IE_PC_HI20:
+    if (isUInt12)
+      write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
+    else
+      write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
+                          0)); // lu12i.w $a0, %le_hi20
+    break;
+  case R_LARCH_TLS_IE_PC_LO12:
+    if (isUInt12)
+      write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
+                          val)); // ori $a0, $zero, %le_lo12
+    else
+      write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
+                          lo12(val))); // ori $a0, $a0, %le_lo12
+    break;
+  }
+}
+
+void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
+  const unsigned bits = ctx.arg.is64 ? 64 : 32;
+  uint64_t secAddr = sec.getOutputSection()->addr;
+  if (auto *s = dyn_cast<InputSection>(&sec))
+    secAddr += s->outSecOff;
+  else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
+    secAddr += ehIn->getParent()->outSecOff;
+  bool isExtreme = false;
+  const MutableArrayRef<Relocation> relocs = sec.relocs();
+  for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+    Relocation &rel = relocs[i];
+    uint8_t *loc = buf + rel.offset;
+    uint64_t val = SignExtend64(
+        sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
+
+    switch (rel.expr) {
+    case R_RELAX_HINT:
+      continue;
+    case R_RELAX_TLS_IE_TO_LE:
+      if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
+        // LoongArch does not support IE to LE optimization in the extreme code
+        // model. In this case, the relocs are as follows:
+        //
+        //  * i   -- R_LARCH_TLS_IE_PC_HI20
+        //  * i+1 -- R_LARCH_TLS_IE_PC_LO12
+        //  * i+2 -- R_LARCH_TLS_IE64_PC_LO20
+        //  * i+3 -- R_LARCH_TLS_IE64_PC_HI12
+        isExtreme =
+            (i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
+      }
+      if (isExtreme) {
+        rel.expr = getRelExpr(rel.type, *rel.sym, loc);
+        val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
+                           bits);
+        relocateNoSym(loc, rel.type, val);
+      } else {
+        tlsIeToLe(loc, rel, val);
+      }
+      continue;
+    default:
+      break;
+    }
+    relocate(loc, rel, val);
+  }
+}
+
 // When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
 // the absence of a linker script. For call and load/store R_LARCH_RELAX, code
 // shrinkage may reduce displacement and make more relocations eligible for

diff  --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index df8d2a6a5f988..81de664fd1c23 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1376,6 +1376,11 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
     return 1;
   }
 
+  // LoongArch supports IE to LE optimization in non-extreme code model.
+  bool execOptimizeInLoongArch =
+      ctx.arg.emachine == EM_LOONGARCH &&
+      (type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
+
   // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
   // optimizations.
   // RISC-V supports TLSDESC to IE/LE optimizations.
@@ -1383,7 +1388,8 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
   // optimization as well.
   bool execOptimize =
       !ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
-      ctx.arg.emachine != EM_HEXAGON && ctx.arg.emachine != EM_LOONGARCH &&
+      ctx.arg.emachine != EM_HEXAGON &&
+      (ctx.arg.emachine != EM_LOONGARCH || execOptimizeInLoongArch) &&
       !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
       !sec->file->ppc64DisableTLSRelax;
 
@@ -1477,6 +1483,15 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
     return 1;
   }
 
+  // LoongArch TLS GD/LD relocs reuse the RE_LOONGARCH_GOT, in which
+  // NEEDS_TLSIE shouldn't set. So we check independently.
+  if (ctx.arg.emachine == EM_LOONGARCH && expr == RE_LOONGARCH_GOT &&
+      execOptimize && isLocalInExecutable) {
+    ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+    sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
+    return 1;
+  }
+
   return 0;
 }
 

diff  --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s
new file mode 100644
index 0000000000000..82e609d005aff
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-tls-ie.s
@@ -0,0 +1,70 @@
+# REQUIRES: loongarch
+## Test LA64 IE -> LE in various cases.
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o
+
+## FIXME: IE relaxation has not yet been implemented.
+## --relax/--no-relax has the same result. Also check --emit-relocs.
+# RUN: ld.lld --emit-relocs %t.o -o %t
+# RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s
+
+# RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
+# RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t.norelax | FileCheck --check-prefixes=LE %s
+
+# LE-GOT: could not find section '.got'
+
+# a at tprel = st_value(a) = 0xfff
+# b at tprel = st_value(a) = 0x1000
+# LE:      20158: nop
+# LE-NEXT:          R_LARCH_TLS_IE_PC_HI20 a
+# LE-NEXT:          R_LARCH_RELAX   *ABS*
+# LE-NEXT:        ori     $a0, $zero, 4095
+# LE-NEXT:          R_LARCH_TLS_IE_PC_LO12 a
+# LE-NEXT:          R_LARCH_RELAX   *ABS*
+# LE-NEXT:        add.d   $a0, $a0, $tp
+# LE-NEXT: 20164: lu12i.w $a1, 1
+# LE-NEXT:          R_LARCH_TLS_IE_PC_HI20 b
+# LE-NEXT:        ori     $a1, $a1, 0
+# LE-NEXT:          R_LARCH_TLS_IE_PC_LO12 b
+# LE-NEXT:        add.d   $a1, $a1, $tp
+# LE-NEXT: 20170: nop
+# LE-NEXT:          R_LARCH_TLS_IE_PC_HI20 a
+# LE-NEXT:          R_LARCH_RELAX   *ABS*
+# LE-NEXT:        lu12i.w $a3, 1
+# LE-NEXT:          R_LARCH_TLS_IE_PC_HI20 b
+# LE-NEXT:          R_LARCH_RELAX   *ABS*
+# LE-NEXT:        ori     $a2, $zero, 4095
+# LE-NEXT:          R_LARCH_TLS_IE_PC_LO12 a
+# LE-NEXT:        ori     $a3, $a3, 0
+# LE-NEXT:          R_LARCH_TLS_IE_PC_LO12 b
+# LE-NEXT:        add.d   $a2, $a2, $tp
+# LE-NEXT:        add.d   $a3, $a3, $tp
+
+la.tls.ie $a0, a    # relax
+add.d $a0, $a0, $tp
+
+# PCALAU12I does not have R_LARCH_RELAX. No relaxation.
+pcalau12i $a1, %ie_pc_hi20(b)
+ld.d $a1, $a1, %ie_pc_lo12(b)
+add.d $a1, $a1, $tp
+
+# Test instructions are interleaved.
+# PCALAU12I has an R_LARCH_RELAX. We perform relaxation.
+pcalau12i $a2, %ie_pc_hi20(a)
+.reloc .-4, R_LARCH_RELAX, 0
+pcalau12i $a3, %ie_pc_hi20(b)
+.reloc .-4, R_LARCH_RELAX, 0
+ld.d $a2, $a2, %ie_pc_lo12(a)
+ld.d $a3, $a3, %ie_pc_lo12(b)
+add.d $a2, $a2, $tp
+add.d $a3, $a3, $tp
+
+.section .tbss,"awT", at nobits
+.globl a
+.zero 0xfff ## Place a at 0xfff, LE needs only one ins.
+a:
+.zero 1  ## Place b at 0x1000, LE needs two ins.
+b:
+.zero 4

diff  --git a/lld/test/ELF/loongarch-tls-ie.s b/lld/test/ELF/loongarch-tls-ie.s
index 78c207991b4e6..ddfd9c976cb9b 100644
--- a/lld/test/ELF/loongarch-tls-ie.s
+++ b/lld/test/ELF/loongarch-tls-ie.s
@@ -12,7 +12,7 @@
 ## LA32 IE -> LE
 # RUN: ld.lld %t/32.o -o %t/32
 # RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s
-# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s
+# RUN: llvm-readelf -x .got %t/32 2>&1 | FileCheck --check-prefix=LE32-GOT %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s
 
 ## LA64 IE
@@ -23,7 +23,7 @@
 ## LA64 IE -> LE
 # RUN: ld.lld %t/64.o -o %t/64
 # RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s
-# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s
+# RUN: llvm-readelf -x .got %t/64 2>&1 | FileCheck --check-prefix=LE64-GOT %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s
 
 # IE32-REL:      FLAGS STATIC_TLS
@@ -62,29 +62,23 @@
 
 # a at tprel = st_value(a) = 0x8
 # b at tprel = st_value(a) = 0xc
-# LE32-GOT: section '.got':
-# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000
-# LE64-GOT: section '.got':
-# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000
+# LE32-GOT: could not find section '.got'
+# LE64-GOT: could not find section '.got'
 
 ## LA32:
-## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c
-## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130
-# LE32:      20114: pcalau12i $a4, 16
-# LE32-NEXT:        ld.w $a4, $a4, 300
+# LE32:      200d4: nop
+# LE32-NEXT:        ori $a4, $zero, 8
 # LE32-NEXT:        add.w $a4, $a4, $tp
-# LE32-NEXT: 20120: pcalau12i $a5, 16
-# LE32-NEXT:        ld.w $a5, $a5, 304
+# LE32-NEXT: 200e0: nop
+# LE32-NEXT:        ori $a5, $zero, 12
 # LE32-NEXT:        add.w $a5, $a5, $tp
 
 ## LA64:
-## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0
-## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8
-# LE64:      201c8: pcalau12i $a4, 16
-# LE64-NEXT:        ld.d $a4, $a4, 480
+# LE64:      20158: nop
+# LE64-NEXT:        ori $a4, $zero, 8
 # LE64-NEXT:        add.d $a4, $a4, $tp
-# LE64-NEXT: 201d4: pcalau12i $a5, 16
-# LE64-NEXT:        ld.d $a5, $a5, 488
+# LE64-NEXT: 20164: nop
+# LE64-NEXT:        ori $a5, $zero, 12
 # LE64-NEXT:        add.d $a5, $a5, $tp
 
 #--- 32.s


        


More information about the llvm-commits mailing list