[lld] [ELF] Add target-specific relocation scanning for SystemZ (PR #181563)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 16 12:03:07 PST 2026


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/181563

>From aa759cd95f33a42c2446ebf98476e8dd6e8037b8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 15 Feb 2026 00:09:07 -0800
Subject: [PATCH 1/2] [ELF] Add target-specific relocation scanning for SystemZ

Implement SystemZ::scanSectionImpl, following the pattern established
for x86 (#178846) and PPC64 (#181496). This merges the getRelExpr and
TLS handling for SHF_ALLOC sections into the target-specific scanner,
enabling devirtualization and eliminating abstraction overhead.

- Inline relocation classification into scanSectionImpl with a switch
  on relocation type, replacing the generic `rs.scan()` path.
- Use processR_PC/processR_PLT_PC for common PC-relative and PLT
  relocations.
- Handle TLS GD, LD, and DTPREL directly, eliminating
  handleTlsRelocation, getTlsGdRelaxSkip, and adjustTlsExpr overrides.
  Replace R_RELAX_TLS_GD_TO_IE_GOT_OFF with R_GOT_OFF and
  R_RELAX_TLS_GD_TO_LE/R_RELAX_TLS_LD_TO_LE with R_TPREL, using
  type-based dispatch in relocate() for marker relocation types.
- Handle TLS IE inline without IE-to-LE optimization. Cannot use
  `handleTlsIe`.
- Remove `sortRels`: instead of sorting relocations to process GDCALL
  before PLT32DBL, skip PLT32DBL by peeking ahead at the next
  relocation to check for a TLS marker (GDCALL/LDCALL).
  This fixes SHT_CREL as an alternative to #149640
- Simplify getRelExpr to only handle relocations needed by
  relocateNonAlloc and .eh_frame.
---
 lld/ELF/Arch/SystemZ.cpp   | 474 +++++++++++++++++++------------------
 lld/ELF/Relocations.cpp    |  10 +-
 lld/ELF/Relocations.h      |  21 --
 lld/test/ELF/systemz-plt.s |   2 +-
 4 files changed, 253 insertions(+), 254 deletions(-)

diff --git a/lld/ELF/Arch/SystemZ.cpp b/lld/ELF/Arch/SystemZ.cpp
index a9125806c0952..6171ff5b4570a 100644
--- a/lld/ELF/Arch/SystemZ.cpp
+++ b/lld/ELF/Arch/SystemZ.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -23,7 +24,6 @@ namespace {
 class SystemZ : public TargetInfo {
 public:
   SystemZ(Ctx &);
-  int getTlsGdRelaxSkip(RelType type) const override;
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
   RelType getDynRel(RelType type) const override;
@@ -34,7 +34,9 @@ class SystemZ : public TargetInfo {
   void addPltHeaderSymbols(InputSection &isd) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
-  RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
+  template <class ELFT, class RelTy>
+  void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
+  void scanSection(InputSectionBase &sec) override;
   RelExpr adjustGotPcExpr(RelType type, int64_t addend,
                           const uint8_t *loc) const override;
   bool relaxOnce(int pass) const override;
@@ -44,9 +46,7 @@ class SystemZ : public TargetInfo {
 
 private:
   void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const;
-  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
-  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
-  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  void relaxTlsGdCall(uint8_t *loc, const Relocation &rel) const;
 };
 } // namespace
 
@@ -79,95 +79,19 @@ SystemZ::SystemZ(Ctx &ctx) : TargetInfo(ctx) {
   defaultImageBase = 0x1000000;
 }
 
+// Only handles relocations used by relocateNonAlloc and scanEhSection.
+// Allocatable sections are handled by scanSectionImpl.
 RelExpr SystemZ::getRelExpr(RelType type, const Symbol &s,
                             const uint8_t *loc) const {
   switch (type) {
   case R_390_NONE:
     return R_NONE;
-  // Relocations targeting the symbol value.
-  case R_390_8:
-  case R_390_12:
-  case R_390_16:
-  case R_390_20:
   case R_390_32:
   case R_390_64:
     return R_ABS;
-  case R_390_PC16:
   case R_390_PC32:
   case R_390_PC64:
-  case R_390_PC12DBL:
-  case R_390_PC16DBL:
-  case R_390_PC24DBL:
-  case R_390_PC32DBL:
     return R_PC;
-  case R_390_GOTOFF16:
-  case R_390_GOTOFF: // a.k.a. R_390_GOTOFF32
-  case R_390_GOTOFF64:
-    return R_GOTREL;
-  // Relocations targeting the PLT associated with the symbol.
-  case R_390_PLT32:
-  case R_390_PLT64:
-  case R_390_PLT12DBL:
-  case R_390_PLT16DBL:
-  case R_390_PLT24DBL:
-  case R_390_PLT32DBL:
-    return R_PLT_PC;
-  case R_390_PLTOFF16:
-  case R_390_PLTOFF32:
-  case R_390_PLTOFF64:
-    return R_PLT_GOTREL;
-  // Relocations targeting the GOT entry associated with the symbol.
-  case R_390_GOTENT:
-    return R_GOT_PC;
-  case R_390_GOT12:
-  case R_390_GOT16:
-  case R_390_GOT20:
-  case R_390_GOT32:
-  case R_390_GOT64:
-    return R_GOT_OFF;
-  // Relocations targeting the GOTPLT entry associated with the symbol.
-  case R_390_GOTPLTENT:
-    return R_GOTPLT_PC;
-  case R_390_GOTPLT12:
-  case R_390_GOTPLT16:
-  case R_390_GOTPLT20:
-  case R_390_GOTPLT32:
-  case R_390_GOTPLT64:
-    return R_GOTPLT_GOTREL;
-  // Relocations targeting _GLOBAL_OFFSET_TABLE_.
-  case R_390_GOTPC:
-  case R_390_GOTPCDBL:
-    return R_GOTONLY_PC;
-  // TLS-related relocations.
-  case R_390_TLS_LOAD:
-    return R_NONE;
-  case R_390_TLS_GDCALL:
-    return R_TLSGD_PC;
-  case R_390_TLS_LDCALL:
-    return R_TLSLD_PC;
-  case R_390_TLS_GD32:
-  case R_390_TLS_GD64:
-    return R_TLSGD_GOT;
-  case R_390_TLS_LDM32:
-  case R_390_TLS_LDM64:
-    return R_TLSLD_GOT;
-  case R_390_TLS_LDO32:
-  case R_390_TLS_LDO64:
-    return R_DTPREL;
-  case R_390_TLS_LE32:
-  case R_390_TLS_LE64:
-    return R_TPREL;
-  case R_390_TLS_IE32:
-  case R_390_TLS_IE64:
-    return R_GOT;
-  case R_390_TLS_GOTIE12:
-  case R_390_TLS_GOTIE20:
-  case R_390_TLS_GOTIE32:
-  case R_390_TLS_GOTIE64:
-    return R_GOT_OFF;
-  case R_390_TLS_IEENT:
-    return R_GOT_PC;
-
   default:
     Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
              << ") against symbol " << &s;
@@ -265,150 +189,244 @@ int64_t SystemZ::getImplicitAddend(const uint8_t *buf, RelType type) const {
   }
 }
 
-RelType SystemZ::getDynRel(RelType type) const {
-  if (type == R_390_64 || type == R_390_PC64)
-    return type;
-  return R_390_NONE;
-}
+template <class ELFT, class RelTy>
+void SystemZ::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  sec.relocations.reserve(rels.size());
+
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    RelType type = it->getType(false);
+
+    // The assembler emits R_390_PLT32DBL (at the displacement field) before
+    // R_390_TLS_GDCALL/LDCALL (at the instruction start) for the same brasl.
+    // When optimizing TLS, skip PLT32DBL before maybeReportUndefined would
+    // flag __tls_get_offset as undefined.
+    if (type == R_390_PLT32DBL && !ctx.arg.shared &&
+        std::next(it) != rels.end()) {
+      RelType nextType = std::next(it)->getType(false);
+      if (nextType == R_390_TLS_GDCALL || nextType == R_390_TLS_LDCALL)
+        continue;
+    }
 
-RelExpr SystemZ::adjustTlsExpr(RelType type, RelExpr expr) const {
-  if (expr == R_RELAX_TLS_GD_TO_IE)
-    return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
-  return expr;
-}
+    uint32_t symIdx = it->getSymbol(false);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+    uint64_t offset = it->r_offset;
+    if (sym.isUndefined() && symIdx != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+    int64_t addend = rs.getAddend<ELFT>(*it, type);
+
+    RelExpr expr;
+    // Relocation types that only need a RelExpr set `expr` and break out of
+    // the switch to reach rs.process(). Types that need special handling
+    // (fast-path helpers, TLS) call a handler and use `continue`.
+    switch (type) {
+    case R_390_NONE:
+    case R_390_TLS_LOAD:
+      continue;
 
-int SystemZ::getTlsGdRelaxSkip(RelType type) const {
-  // A __tls_get_offset call instruction is marked with 2 relocations:
-  //
-  //   R_390_TLS_GDCALL / R_390_TLS_LDCALL: marker relocation
-  //   R_390_PLT32DBL: __tls_get_offset
-  //
-  // After the relaxation we no longer call __tls_get_offset and should skip
-  // both relocations to not create a false dependence on __tls_get_offset
-  // being defined.
-  //
-  // Note that this mechanism only works correctly if the R_390_TLS_[GL]DCALL
-  // is seen immediately *before* the R_390_PLT32DBL.  Unfortunately, current
-  // compilers on the platform will typically generate the inverse sequence.
-  // To fix this, we sort relocations by offset in RelocationScanner::scan;
-  // this ensures the correct sequence as the R_390_TLS_[GL]DCALL applies to
-  // the first byte of the brasl instruction, while the R_390_PLT32DBL applies
-  // to its third byte (the relative displacement).
-
-  if (type == R_390_TLS_GDCALL || type == R_390_TLS_LDCALL)
-    return 2;
-  return 1;
-}
+    // Absolute relocations:
+    case R_390_8:
+    case R_390_12:
+    case R_390_16:
+    case R_390_20:
+    case R_390_32:
+    case R_390_64:
+      expr = R_ABS;
+      break;
+
+    // PC-relative relocations:
+    case R_390_PC16:
+    case R_390_PC32:
+    case R_390_PC64:
+    case R_390_PC12DBL:
+    case R_390_PC16DBL:
+    case R_390_PC24DBL:
+    case R_390_PC32DBL:
+      rs.processR_PC(type, offset, addend, sym);
+      continue;
 
-void SystemZ::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
-                             uint64_t val) const {
-  // The general-dynamic code sequence for a global `x`:
-  //
-  // Instruction                      Relocation       Symbol
-  // ear %rX,%a0
-  // sllg %rX,%rX,32
-  // ear %rX,%a1
-  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
-  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
-  // brasl %r14,__tls_get_offset at plt  R_390_TLS_GDCALL x
-  //            :tls_gdcall:x         R_390_PLT32DBL   __tls_get_offset
-  // la %r2,0(%r2,%rX)
-  //
-  // .LC0:
-  // .quad   x at TLSGD                  R_390_TLS_GD64   x
-  //
-  // Relaxing to initial-exec entails:
-  // 1) Replacing the call by a load from the GOT.
-  // 2) Replacing the relocation on the constant LC0 by R_390_TLS_GOTIE64.
+    // PLT-generating relocations:
+    case R_390_PLT32:
+    case R_390_PLT64:
+    case R_390_PLT12DBL:
+    case R_390_PLT16DBL:
+    case R_390_PLT24DBL:
+    case R_390_PLT32DBL:
+      rs.processR_PLT_PC(type, offset, addend, sym);
+      continue;
+    case R_390_PLTOFF16:
+    case R_390_PLTOFF32:
+    case R_390_PLTOFF64:
+      expr = R_PLT_GOTREL;
+      break;
+
+    // GOT-generating relocations:
+    case R_390_GOTOFF16:
+    case R_390_GOTOFF: // a.k.a. R_390_GOTOFF32
+    case R_390_GOTOFF64:
+      ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTREL;
+      break;
+    case R_390_GOTENT:
+      expr = R_GOT_PC;
+      break;
+    case R_390_GOT12:
+    case R_390_GOT16:
+    case R_390_GOT20:
+    case R_390_GOT32:
+    case R_390_GOT64:
+      expr = R_GOT_OFF;
+      break;
+
+    case R_390_GOTPLTENT:
+      expr = R_GOTPLT_PC;
+      break;
+    case R_390_GOTPLT12:
+    case R_390_GOTPLT16:
+    case R_390_GOTPLT20:
+    case R_390_GOTPLT32:
+    case R_390_GOTPLT64:
+      expr = R_GOTPLT_GOTREL;
+      break;
+    case R_390_GOTPC:
+    case R_390_GOTPCDBL:
+      ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTONLY_PC;
+      break;
+
+    // TLS relocations:
+    case R_390_TLS_LE32:
+    case R_390_TLS_LE64:
+      if (rs.checkTlsLe(offset, sym, type))
+        continue;
+      expr = R_TPREL;
+      break;
+    case R_390_TLS_IE32:
+    case R_390_TLS_IE64:
+      // There is no IE to LE optimization.
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      // R_GOT (absolute GOT address) needs a RELATIVE dynamic relocation
+      // in PIC.
+      if (ctx.arg.isPic)
+        sec.getPartition(ctx).relaDyn->addRelativeReloc(
+            ctx.target->relativeRel, sec, offset, sym, addend, type, R_GOT);
+      else
+        sec.addReloc({R_GOT, type, offset, addend, &sym});
+      continue;
+    case R_390_TLS_GOTIE12:
+    case R_390_TLS_GOTIE20:
+    case R_390_TLS_GOTIE32:
+    case R_390_TLS_GOTIE64:
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      sec.addReloc({R_GOT_OFF, type, offset, addend, &sym});
+      continue;
+    case R_390_TLS_IEENT:
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      sec.addReloc({R_GOT_PC, type, offset, addend, &sym});
+      continue;
+    case R_390_TLS_GDCALL:
+      // Use dummy R_ABS for `sharedExpr` (no optimization), which is a no-op in
+      // relocate().
+      rs.handleTlsGd(R_ABS, R_GOT_OFF, R_TPREL, type, offset, addend, sym);
+      continue;
+    case R_390_TLS_GD32:
+    case R_390_TLS_GD64:
+      rs.handleTlsGd(R_TLSGD_GOT, R_GOT_OFF, R_TPREL, type, offset, addend,
+                     sym);
+      continue;
 
-  switch (rel.type) {
-  case R_390_TLS_GDCALL:
-    // brasl %r14,__tls_get_offset at plt -> lg %r2,0(%r2,%r12)
-    write16be(loc, 0xe322);
-    write32be(loc + 2, 0xc0000004);
-    break;
-  case R_390_TLS_GD64:
-    relocateNoSym(loc, R_390_TLS_GOTIE64, val);
-    break;
-  default:
-    llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
+    case R_390_TLS_LDCALL:
+      // Use dummy R_ABS for `sharedExpr` (no optimization), which is a no-op in
+      // relocate().
+      rs.handleTlsLd(R_ABS, type, offset, addend, sym);
+      continue;
+    // TLS LD GOT relocations:
+    case R_390_TLS_LDM32:
+    case R_390_TLS_LDM64:
+      rs.handleTlsLd(R_TLSLD_GOT, type, offset, addend, sym);
+      continue;
+    // TLS DTPREL relocations:
+    case R_390_TLS_LDO32:
+    case R_390_TLS_LDO64:
+      if (ctx.arg.shared)
+        sec.addReloc({R_DTPREL, type, offset, addend, &sym});
+      else
+        sec.addReloc({R_TPREL, type, offset, addend, &sym});
+      continue;
+
+    default:
+      Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+               << "unknown relocation (" << type.v << ") against symbol "
+               << &sym;
+      continue;
+    }
+    rs.process(expr, type, offset, sym, addend);
   }
 }
 
-void SystemZ::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
-                             uint64_t val) const {
-  // The general-dynamic code sequence for a global `x`:
-  //
-  // Instruction                      Relocation       Symbol
-  // ear %rX,%a0
-  // sllg %rX,%rX,32
-  // ear %rX,%a1
-  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
-  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
-  // brasl %r14,__tls_get_offset at plt  R_390_TLS_GDCALL x
-  //            :tls_gdcall:x         R_390_PLT32DBL   __tls_get_offset
-  // la %r2,0(%r2,%rX)
-  //
-  // .LC0:
-  // .quad   x at tlsgd                  R_390_TLS_GD64   x
-  //
-  // Relaxing to local-exec entails:
-  // 1) Replacing the call by a nop.
-  // 2) Replacing the relocation on the constant LC0 by R_390_TLS_LE64.
-
-  switch (rel.type) {
-  case R_390_TLS_GDCALL:
-    // brasl %r14,__tls_get_offset at plt -> brcl 0,.
-    write16be(loc, 0xc004);
-    write32be(loc + 2, 0x00000000);
-    break;
-  case R_390_TLS_GD64:
-    relocateNoSym(loc, R_390_TLS_LE64, val);
-    break;
-  default:
-    llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
-  }
+void SystemZ::scanSection(InputSectionBase &sec) {
+  elf::scanSection1<SystemZ, ELF64BE>(*this, sec);
 }
 
-void SystemZ::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
-                             uint64_t val) const {
-  // The local-dynamic code sequence for a global `x`:
-  //
-  // Instruction                      Relocation       Symbol
-  // ear %rX,%a0
-  // sllg %rX,%rX,32
-  // ear %rX,%a1
-  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
-  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
-  // brasl %r14,__tls_get_offset at plt  R_390_TLS_LDCALL <sym>
-  //            :tls_ldcall:<sym>     R_390_PLT32DBL   __tls_get_offset
-  // la %r2,0(%r2,%rX)
-  // lgrl %rY,.LC1                    R_390_PC32DBL    .LC1
-  // la %r2,0(%r2,%rY)
-  //
-  // .LC0:
-  // .quad   <sym>@tlsldm             R_390_TLS_LDM64  <sym>
-  // .LC1:
-  // .quad   x at dtpoff                 R_390_TLS_LDO64  x
-  //
-  // Relaxing to local-exec entails:
-  // 1) Replacing the call by a nop.
-  // 2) Replacing the constant LC0 by 0 (i.e. ignoring the relocation).
-  // 3) Replacing the relocation on the constant LC1 by R_390_TLS_LE64.
+RelType SystemZ::getDynRel(RelType type) const {
+  if (type == R_390_64 || type == R_390_PC64)
+    return type;
+  return R_390_NONE;
+}
 
-  switch (rel.type) {
-  case R_390_TLS_LDCALL:
+// Rewrite the brasl instruction at loc for TLS GD/LD optimization.
+//
+// The general-dynamic code sequence for a global `x`:
+//
+// Instruction                      Relocation       Symbol
+// ear %rX,%a0
+// sllg %rX,%rX,32
+// ear %rX,%a1
+// larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
+// lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
+// brasl %r14,__tls_get_offset at plt  R_390_TLS_GDCALL x
+//            :tls_gdcall:x         R_390_PLT32DBL   __tls_get_offset
+// la %r2,0(%r2,%rX)
+//
+// .LC0:
+// .quad   x at TLSGD                  R_390_TLS_GD64   x
+//
+// GD -> IE: replacing the call by a GOT load and LC0 by R_390_TLS_GOTIE64.
+// GD -> LE: replacing the call by a nop and LC0 by R_390_TLS_LE64.
+//
+// The local-dynamic code sequence for a global `x`:
+//
+// Instruction                      Relocation       Symbol
+// ear %rX,%a0
+// sllg %rX,%rX,32
+// ear %rX,%a1
+// larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
+// lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
+// brasl %r14,__tls_get_offset at plt  R_390_TLS_LDCALL <sym>
+//            :tls_ldcall:<sym>     R_390_PLT32DBL   __tls_get_offset
+// la %r2,0(%r2,%rX)
+// lgrl %rY,.LC1                    R_390_PC32DBL    .LC1
+// la %r2,0(%r2,%rY)
+//
+// .LC0:
+// .quad   <sym>@tlsldm             R_390_TLS_LDM64  <sym>
+// .LC1:
+// .quad   x at dtpoff                 R_390_TLS_LDO64  x
+//
+// LD -> LE: replacing the call by a nop, LC0 by 0, LC1 by R_390_TLS_LE64.
+void SystemZ::relaxTlsGdCall(uint8_t *loc, const Relocation &rel) const {
+  if (rel.expr == R_GOT_OFF) {
+    // brasl %r14,__tls_get_offset at plt -> lg %r2,0(%r2,%r12)
+    write16be(loc, 0xe322);
+    write32be(loc + 2, 0xc0000004);
+  } else {
     // brasl %r14,__tls_get_offset at plt -> brcl 0,.
     write16be(loc, 0xc004);
     write32be(loc + 2, 0x00000000);
-    break;
-  case R_390_TLS_LDM64:
-    break;
-  case R_390_TLS_LDO64:
-    relocateNoSym(loc, R_390_TLS_LE64, val);
-    break;
-  default:
-    llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
   }
 }
 
@@ -477,18 +495,28 @@ void SystemZ::relaxGot(uint8_t *loc, const Relocation &rel,
 
 void SystemZ::relocate(uint8_t *loc, const Relocation &rel,
                        uint64_t val) const {
-  switch (rel.expr) {
-  case R_RELAX_GOT_PC:
+  if (rel.expr == R_RELAX_GOT_PC)
     return relaxGot(loc, rel, val);
-  case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
-    return relaxTlsGdToIe(loc, rel, val);
-  case R_RELAX_TLS_GD_TO_LE:
-    return relaxTlsGdToLe(loc, rel, val);
-  case R_RELAX_TLS_LD_TO_LE:
-    return relaxTlsLdToLe(loc, rel, val);
+
+  // Handle TLS optimizations. GDCALL/LDCALL: rewrite the brasl instruction
+  // and return. LDM slots are zeroed when relaxed to LE. Other TLS data slot
+  // types (GD32/GD64, LDO) fall through to the normal type-based switch below.
+  switch (rel.type) {
+  case R_390_TLS_GDCALL:
+  case R_390_TLS_LDCALL:
+    if (rel.expr == R_ABS) // Shared: no optimization.
+      return;
+    relaxTlsGdCall(loc, rel);
+    return;
+  case R_390_TLS_LDM32:
+  case R_390_TLS_LDM64:
+    if (rel.expr == R_TPREL)
+      return; // LD -> LE: slot stays 0.
+    break;
   default:
     break;
   }
+
   switch (rel.type) {
   case R_390_8:
     checkIntUInt(ctx, loc, val, 8, rel);
@@ -590,8 +618,6 @@ void SystemZ::relocate(uint8_t *loc, const Relocation &rel,
     write64be(loc, val);
     break;
   case R_390_TLS_LOAD:
-  case R_390_TLS_GDCALL:
-  case R_390_TLS_LDCALL:
     break;
   default:
     llvm_unreachable("unknown relocation");
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 1827e4800cd7f..aa460ecdd6bb9 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1294,8 +1294,8 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
             RE_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) {
     ctx.hasTlsIe.store(true, std::memory_order_relaxed);
     // Initial-Exec relocs can be optimized to Local-Exec if the symbol is
-    // locally defined.  This is not supported on SystemZ.
-    if (execOptimize && isLocalInExecutable && ctx.arg.emachine != EM_S390) {
+    // locally defined.
+    if (execOptimize && isLocalInExecutable) {
       sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
     } else if (expr != R_TLSIE_HINT) {
       sym.setFlags(NEEDS_TLSIE);
@@ -1327,12 +1327,6 @@ void TargetInfo::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
   // Many relocations end up in sec.relocations.
   sec.relocations.reserve(rels.size());
 
-  // On SystemZ, all sections need to be sorted by r_offset, to allow TLS
-  // relaxation to be handled correctly - see SystemZ::getTlsGdRelaxSkip.
-  SmallVector<RelTy, 0> storage;
-  if (ctx.arg.emachine == EM_S390)
-    rels = sortRels(rels, storage);
-
   for (auto it = rels.begin(); it != rels.end(); ++it) {
     auto type = it->getType(false);
     rs.scan<ELFT, RelTy>(it, type, rs.getAddend<ELFT>(*it, type));
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 7ba94a4bef849..7966c050a166f 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -351,27 +351,6 @@ static inline int64_t getAddend(const typename ELFT::Crel &rel) {
   return rel.r_addend;
 }
 
-template <typename RelTy>
-inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
-                              SmallVector<RelTy, 0> &storage) {
-  auto cmp = [](const RelTy &a, const RelTy &b) {
-    return a.r_offset < b.r_offset;
-  };
-  if (!llvm::is_sorted(rels, cmp)) {
-    storage.assign(rels.begin(), rels.end());
-    llvm::stable_sort(storage, cmp);
-    rels = Relocs<RelTy>(storage);
-  }
-  return rels;
-}
-
-template <bool is64>
-inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
-sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
-         SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
-  return {};
-}
-
 RelocationBaseSection &getIRelativeSection(Ctx &ctx);
 
 // Returns true if Expr refers a GOT entry. Note that this function returns
diff --git a/lld/test/ELF/systemz-plt.s b/lld/test/ELF/systemz-plt.s
index 717343ce4c4d5..1207f0704db8e 100644
--- a/lld/test/ELF/systemz-plt.s
+++ b/lld/test/ELF/systemz-plt.s
@@ -3,7 +3,7 @@
 
 # RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t1.s -o %t1.o
 # RUN: ld.lld -shared %t1.o -soname=t1.so -o %t1.so
-# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux --crel %s -o %t.o
 # RUN: ld.lld %t.o %t1.so -z separate-code -o %t
 # RUN: llvm-readelf -S -s -r -x .got.plt %t | FileCheck %s
 # RUN: llvm-objdump -d %t | FileCheck --check-prefixes=DIS %s

>From ae2b546ed8df0dd3c17f8269ac2807e5fcf0ba98 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Mon, 16 Feb 2026 12:02:57 -0800
Subject: [PATCH 2/2] minor

---
 lld/ELF/Arch/SystemZ.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lld/ELF/Arch/SystemZ.cpp b/lld/ELF/Arch/SystemZ.cpp
index 1f34fcc939c3d..43ce4d2f7d3e0 100644
--- a/lld/ELF/Arch/SystemZ.cpp
+++ b/lld/ELF/Arch/SystemZ.cpp
@@ -214,7 +214,6 @@ void SystemZ::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
         rs.maybeReportUndefined(cast<Undefined>(sym), offset))
       continue;
     int64_t addend = rs.getAddend<ELFT>(*it, type);
-
     RelExpr expr;
     // Relocation types that only need a RelExpr set `expr` and break out of
     // the switch to reach rs.process(). Types that need special handling



More information about the llvm-commits mailing list