[lld] [ELF] Add target-specific relocation scanning for x86 (PR #178846)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 30 00:06:45 PST 2026


https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/178846

Implement scanSection/scanSectionImpl for i386 and x86-64 to

* enable devirtualization of getRelExpr calls
* eliminate abstraction overhead for PLT-to-PCRel optimization, TLS relocations
* optimize for R_X86_64_PC32 and R_X86_64_PLT32: they consist of 95%
  relocations in `lld/ELF/**/*.o` files.

at the cost of more code.

TLS relocation handling is inlined into scanSectionImpl. Also,

- Remove getTlsGdRelaxSkip
- Replace TLS-optimization-specific expressions:
  - R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_LD_TO_LE, R_RELAX_TLS_IE_TO_LE → R_TPREL
  - R_RELAX_TLS_GD_TO_IE → R_GOT_PC

This follows the pattern established for MIPS and PPC64 in `#163138`.

getRelExpr is simplified to only handle relocations needed for
relocateNonAlloc and relocateEH.

Average "Scan relocations" time (measured by --time-trace) decreases from 110078 to 101585.

(
```
ruby -rjson -e 't=[]; 50.times { system("numactl -C 0-7 /t/lld1 -flavor gnu --threads=8 @response.txt -o a.out --time-trace"); t << JSON.parse(File.read("a.out.time-trace"))["traceEvents"].find { |e| e["name"] == "Scan relocations" }["dur"] }; puts "Average: #{t.sum / t.size}"'
```
)


>From a3687b18ba50c4d32e28aa595919efdcd0506968 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Wed, 28 Jan 2026 00:24:04 -0800
Subject: [PATCH] [ELF] Add target-specific relocation scanning for x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement scanSection/scanSectionImpl for i386 and x86-64 to

* enable devirtualization of getRelExpr calls
* eliminate abstraction overhead for PLT-to-PCRel optimization, TLS relocations
* optimize for R_X86_64_PC32 and R_X86_64_PLT32: they consist of 95%
  relocations in `lld/ELF/**/*.o` files.

at the cost of more code.

TLS relocation handling is inlined into scanSectionImpl. Also,

- Remove getTlsGdRelaxSkip
- Replace TLS-optimization-specific expressions:
  - R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_LD_TO_LE, R_RELAX_TLS_IE_TO_LE → R_TPREL
  - R_RELAX_TLS_GD_TO_IE → R_GOT_PC

This follows the pattern established for MIPS and PPC64 in `#163138`.

getRelExpr is simplified to only handle relocations needed for
relocateNonAlloc and relocateEH.

Average "Scan relocations" time (measured by --time-trace) decreases from 110078 to 101585.

(
```
ruby -rjson -e 't=[]; 50.times { system("numactl -C 0-7 /t/lld1 -flavor gnu --threads=8 @response.txt -o a.out --time-trace"); t << JSON.parse(File.read("a.out.time-trace"))["traceEvents"].find { |e| e["name"] == "Scan relocations" }["dur"] }; puts "Average: #{t.sum / t.size}"'
```
)
---
 lld/ELF/Arch/X86.cpp    | 243 ++++++++++++++++++++++++++--------------
 lld/ELF/Arch/X86_64.cpp | 190 +++++++++++++++++++++++--------
 lld/ELF/RelocScan.h     |  95 +++++++++++++++-
 lld/ELF/Relocations.cpp |  14 ++-
 lld/ELF/Relocations.h   |   1 +
 5 files changed, 407 insertions(+), 136 deletions(-)

diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index 904741fd72b0a..0a90f4954c44d 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -22,9 +23,8 @@ namespace {
 class X86 : public TargetInfo {
 public:
   X86(Ctx &);
-  int getTlsGdRelaxSkip(RelType type) const override;
   RelExpr getRelExpr(RelType type, const Symbol &s,
-                     const uint8_t *loc) const override;
+                     const uint8_t *loc) const final;
   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
   void writeGotPltHeader(uint8_t *buf) const override;
   RelType getDynRel(RelType type) const override;
@@ -35,8 +35,9 @@ class X86 : public TargetInfo {
                 uint64_t pltEntryAddr) const override;
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
-
-  RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
+  template <class ELFT, class RelTy>
+  void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
+  void scanSection(InputSectionBase &sec) override;
   void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
 
 private:
@@ -69,11 +70,7 @@ X86::X86(Ctx &ctx) : TargetInfo(ctx) {
   defaultImageBase = 0x400000;
 }
 
-int X86::getTlsGdRelaxSkip(RelType type) const {
-  // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
-  return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
-}
-
+// Only needed to support relocations used by relocateNonAlloc.
 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
                         const uint8_t *loc) const {
   switch (type) {
@@ -83,10 +80,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
     return R_ABS;
   case R_386_TLS_LDO_32:
     return R_DTPREL;
-  case R_386_TLS_GD:
-    return R_TLSGD_GOTPLT;
-  case R_386_TLS_LDM:
-    return R_TLSLD_GOTPLT;
   case R_386_PLT32:
     return R_PLT_PC;
   case R_386_PC8:
@@ -95,52 +88,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
     return R_PC;
   case R_386_GOTPC:
     return R_GOTPLTONLY_PC;
-  case R_386_TLS_IE:
-    return R_GOT;
-  case R_386_GOT32:
-  case R_386_GOT32X:
-    // These relocations are arguably mis-designed because their calculations
-    // depend on the instructions they are applied to. This is bad because we
-    // usually don't care about whether the target section contains valid
-    // machine instructions or not. But this is part of the documented ABI, so
-    // we had to implement as the standard requires.
-    //
-    // x86 does not support PC-relative data access. Therefore, in order to
-    // access GOT contents, a GOT address needs to be known at link-time
-    // (which means non-PIC) or compilers have to emit code to get a GOT
-    // address at runtime (which means code is position-independent but
-    // compilers need to emit extra code for each GOT access.) This decision
-    // is made at compile-time. In the latter case, compilers emit code to
-    // load a GOT address to a register, which is usually %ebx.
-    //
-    // So, there are two ways to refer to symbol foo's GOT entry: foo at GOT or
-    // foo at GOT(%ebx).
-    //
-    // foo at GOT is not usable in PIC. If we are creating a PIC output and if we
-    // find such relocation, we should report an error. foo at GOT is resolved to
-    // an *absolute* address of foo's GOT entry, because both GOT address and
-    // foo's offset are known. In other words, it's G + A.
-    //
-    // foo at GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
-    // foo's GOT entry in the table, because GOT address is not known but foo's
-    // offset in the table is known. It's G + A - GOT.
-    //
-    // It's unfortunate that compilers emit the same relocation for these
-    // different use cases. In order to distinguish them, we have to read a
-    // machine instruction.
-    //
-    // The following code implements it. We assume that Loc[0] is the first byte
-    // of a displacement or an immediate field of a valid machine
-    // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
-    // the byte, we can determine whether the instruction uses the operand as an
-    // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
-    return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
-  case R_386_TLS_GOTDESC:
-    return R_TLSDESC_GOTPLT;
-  case R_386_TLS_DESC_CALL:
-    return R_TLSDESC_CALL;
-  case R_386_TLS_GOTIE:
-    return R_GOTPLT;
   case R_386_GOTOFF:
     return R_GOTPLTREL;
   case R_386_TLS_LE:
@@ -156,18 +103,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
   }
 }
 
-RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
-  switch (expr) {
-  default:
-    return expr;
-  case R_RELAX_TLS_GD_TO_IE:
-    return R_RELAX_TLS_GD_TO_IE_GOTPLT;
-  case R_RELAX_TLS_GD_TO_LE:
-    return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
-                                : R_RELAX_TLS_GD_TO_LE;
-  }
-}
-
 void X86::writeGotPltHeader(uint8_t *buf) const {
   write32le(buf, ctx.mainPart->dynamic->getVA());
 }
@@ -411,11 +346,6 @@ void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
     }
     loc[-2] = 0x8b;
     write32le(loc, val);
-  } else {
-    // Convert call *x at tlsdesc(%eax) to xchg ax, ax.
-    assert(rel.type == R_386_TLS_DESC_CALL);
-    loc[0] = 0x66;
-    loc[1] = 0x90;
   }
 }
 
@@ -497,19 +427,30 @@ void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
     uint8_t *loc = buf + rel.offset;
     const uint64_t val =
         SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
-    switch (rel.expr) {
-    case R_RELAX_TLS_GD_TO_IE_GOTPLT:
-      relaxTlsGdToIe(loc, rel, val);
+    switch (rel.type) {
+    case R_386_TLS_GD:
+    case R_386_TLS_GOTDESC:
+    case R_386_TLS_DESC_CALL:
+      if (rel.expr == R_TPREL || rel.expr == R_TPREL_NEG)
+        relaxTlsGdToLe(loc, rel, val);
+      else if (rel.expr == R_GOTPLT)
+        relaxTlsGdToIe(loc, rel, val);
+      else
+        relocate(loc, rel, val);
       continue;
-    case R_RELAX_TLS_GD_TO_LE:
-    case R_RELAX_TLS_GD_TO_LE_NEG:
-      relaxTlsGdToLe(loc, rel, val);
+    case R_386_TLS_LDM:
+    case R_386_TLS_LDO_32:
+      if (rel.expr == R_TPREL)
+        relaxTlsLdToLe(loc, rel, val);
+      else
+        relocate(loc, rel, val);
       continue;
-    case R_RELAX_TLS_LD_TO_LE:
-      relaxTlsLdToLe(loc, rel, val);
-      break;
-    case R_RELAX_TLS_IE_TO_LE:
-      relaxTlsIeToLe(loc, rel, val);
+    case R_386_TLS_IE:
+    case R_386_TLS_GOTIE:
+      if (rel.expr == R_TPREL)
+        relaxTlsIeToLe(loc, rel, val);
+      else
+        relocate(loc, rel, val);
       continue;
     default:
       relocate(loc, rel, val);
@@ -518,6 +459,134 @@ void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
   }
 }
 
+template <class ELFT, class RelTy>
+void X86::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  sec.relocations.reserve(rels.size());
+
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    const RelTy &rel = *it;
+    uint32_t symIdx = rel.getSymbol(false);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+    uint64_t offset = rel.r_offset;
+    RelType type = rel.getType(false);
+    if (sym.isUndefined() && symIdx != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+    int64_t addend = rs.getAddend<ELFT>(rel, type);
+    RelExpr expr;
+    switch (type) {
+    case R_386_NONE:
+      continue;
+
+      // Absolute relocations:
+    case R_386_8:
+    case R_386_16:
+    case R_386_32:
+      expr = R_ABS;
+      break;
+
+      // PC-relative relocations:
+    case R_386_PC8:
+    case R_386_PC16:
+    case R_386_PC32:
+      rs.scanPCRel(type, offset, addend, sym);
+      continue;
+
+      // PLT-generating relocation:
+    case R_386_PLT32:
+      rs.scanPlt(type, offset, addend, sym);
+      continue;
+
+      // GOT-related relocations:
+    case R_386_GOTPC:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTPLTONLY_PC;
+      break;
+    case R_386_GOTOFF:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTPLTREL;
+      break;
+    case R_386_GOT32:
+    case R_386_GOT32X:
+      // These relocations return R_GOT or R_GOTPLT depending on instruction
+      // encoding. R_GOT is absolute, R_GOTPLT is relative to GOT base.
+      expr =
+          (sec.content().data()[offset - 1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
+      if (expr == R_GOTPLT)
+        ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      break;
+
+      // TLS relocations:
+    case R_386_TLS_LE:
+      if (rs.checkTlsLe(offset, sym, type))
+        continue;
+      expr = R_TPREL;
+      break;
+    case R_386_TLS_LE_32:
+      if (rs.checkTlsLe(offset, sym, type))
+        continue;
+      expr = R_TPREL_NEG;
+      break;
+    case R_386_TLS_IE:
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      if (!ctx.arg.shared && !sym.isPreemptible) {
+        sec.addReloc({R_TPREL, type, offset, addend, &sym});
+      } else {
+        sym.setFlags(NEEDS_TLSIE);
+        // In PIC, the absolute GOT address needs a RELATIVE dynamic relocation.
+        if (ctx.arg.isPic)
+          sec.getPartition(ctx).relaDyn->addRelativeReloc(
+              ctx.target->relativeRel, sec, offset, sym, addend, type, R_GOT);
+        else
+          sec.addReloc({R_GOT, type, offset, addend, &sym});
+      }
+      continue;
+    case R_386_TLS_GOTIE:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      rs.handleTlsIe(R_GOTPLT, type, offset, addend, sym);
+      continue;
+    case R_386_TLS_GD:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      // Use R_TPREL_NEG for negative TP offset.
+      if (rs.handleTlsGd(R_TLSGD_GOTPLT, R_GOTPLT, R_TPREL_NEG, type, offset,
+                         addend, sym))
+        ++it;
+      continue;
+    case R_386_TLS_LDM:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      if (rs.handleTlsLd(R_TLSLD_GOTPLT, type, offset, addend, sym))
+        ++it;
+      continue;
+    case R_386_TLS_LDO_32:
+      sec.addReloc(
+          {ctx.arg.shared ? R_DTPREL : R_TPREL, type, offset, addend, &sym});
+      continue;
+    case R_386_TLS_GOTDESC:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      rs.handleTlsDesc(R_TLSDESC_GOTPLT, R_GOTPLT, type, offset, addend, sym);
+      continue;
+    case R_386_TLS_DESC_CALL:
+      // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
+      // rewrites for this relocation are identical.
+      if (!ctx.arg.shared)
+        sec.addReloc({R_TPREL, type, offset, addend, &sym});
+      continue;
+
+    default:
+      Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+               << "unknown relocation (" << type.v << ") against symbol "
+               << &sym;
+      continue;
+    }
+    rs.process(expr, type, offset, sym, addend);
+  }
+}
+
+void X86::scanSection(InputSectionBase &sec) {
+  elf::scanSection1<X86, ELF32LE>(*this, sec);
+}
+
 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
 // entries containing endbr32 instructions. A PLT entry will be split into two
 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 9083b5b9ff250..d08e605621143 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OutputSections.h"
+#include "RelocScan.h"
 #include "Relocations.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -27,9 +28,8 @@ namespace {
 class X86_64 : public TargetInfo {
 public:
   X86_64(Ctx &);
-  int getTlsGdRelaxSkip(RelType type) const override;
   RelExpr getRelExpr(RelType type, const Symbol &s,
-                     const uint8_t *loc) const override;
+                     const uint8_t *loc) const final;
   RelType getDynRel(RelType type) const override;
   void writeGotPltHeader(uint8_t *buf) const override;
   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
@@ -51,6 +51,9 @@ class X86_64 : public TargetInfo {
                              InputSection *nextIS) const override;
   bool relaxOnce(int pass) const override;
   void applyBranchToBranchOpt() const override;
+  template <class ELFT, class RelTy>
+  void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
+  void scanSection(InputSectionBase &sec) override;
 
 private:
   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -98,15 +101,6 @@ X86_64::X86_64(Ctx &ctx) : TargetInfo(ctx) {
   defaultImageBase = 0x200000;
 }
 
-int X86_64::getTlsGdRelaxSkip(RelType type) const {
-  // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
-  return type == R_X86_64_GOTPC32_TLSDESC ||
-                 type == R_X86_64_CODE_4_GOTPC32_TLSDESC ||
-                 type == R_X86_64_TLSDESC_CALL
-             ? 1
-             : 2;
-}
-
 // Opcodes for the different X86_64 jmp instructions.
 enum JmpInsnOpcode : uint32_t {
   J_JMP_32,
@@ -361,6 +355,7 @@ bool X86_64::relaxOnce(int pass) const {
   return changed;
 }
 
+// Only needed to support relocations used by relocateNonAlloc and relocateEH.
 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
                            const uint8_t *loc) const {
   switch (type) {
@@ -370,21 +365,15 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
   case R_X86_64_32S:
   case R_X86_64_64:
     return R_ABS;
+  case R_X86_64_SIZE32:
+  case R_X86_64_SIZE64:
+    return R_SIZE;
   case R_X86_64_DTPOFF32:
   case R_X86_64_DTPOFF64:
     return R_DTPREL;
   case R_X86_64_TPOFF32:
   case R_X86_64_TPOFF64:
     return R_TPREL;
-  case R_X86_64_TLSDESC_CALL:
-    return R_TLSDESC_CALL;
-  case R_X86_64_TLSLD:
-    return R_TLSLD_PC;
-  case R_X86_64_TLSGD:
-    return R_TLSGD_PC;
-  case R_X86_64_SIZE32:
-  case R_X86_64_SIZE64:
-    return R_SIZE;
   case R_X86_64_PLT32:
     return R_PLT_PC;
   case R_X86_64_PC8:
@@ -392,24 +381,8 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
   case R_X86_64_PC32:
   case R_X86_64_PC64:
     return R_PC;
-  case R_X86_64_GOT32:
-  case R_X86_64_GOT64:
-    return R_GOTPLT;
-  case R_X86_64_GOTPC32_TLSDESC:
-  case R_X86_64_CODE_4_GOTPC32_TLSDESC:
-    return R_TLSDESC_PC;
-  case R_X86_64_GOTPCREL:
-  case R_X86_64_GOTPCRELX:
-  case R_X86_64_REX_GOTPCRELX:
-  case R_X86_64_CODE_4_GOTPCRELX:
-  case R_X86_64_GOTTPOFF:
-  case R_X86_64_CODE_4_GOTTPOFF:
-  case R_X86_64_CODE_6_GOTTPOFF:
-    return R_GOT_PC;
   case R_X86_64_GOTOFF64:
     return R_GOTPLTREL;
-  case R_X86_64_PLTOFF64:
-    return R_PLT_GOTPLT;
   case R_X86_64_GOTPC32:
   case R_X86_64_GOTPC64:
     return R_GOTPLTONLY_PC;
@@ -558,11 +531,6 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
     }
     loc[-2] = 0x8b;
     write32le(loc, val);
-  } else {
-    // Convert call *x at tlsdesc(%rax) to xchg ax, ax.
-    assert(rel.type == R_X86_64_TLSDESC_CALL);
-    loc[0] = 0x66;
-    loc[1] = 0x90;
   }
 }
 
@@ -915,9 +883,9 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_X86_64_CODE_4_GOTPC32_TLSDESC:
   case R_X86_64_TLSDESC_CALL:
   case R_X86_64_TLSGD:
-    if (rel.expr == R_RELAX_TLS_GD_TO_LE) {
+    if (rel.expr == R_TPREL) {
       relaxTlsGdToLe(loc, rel, val);
-    } else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {
+    } else if (rel.expr == R_GOT_PC) {
       relaxTlsGdToIe(loc, rel, val);
     } else {
       checkInt(ctx, loc, val, 32, rel);
@@ -925,7 +893,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     }
     break;
   case R_X86_64_TLSLD:
-    if (rel.expr == R_RELAX_TLS_LD_TO_LE) {
+    if (rel.expr == R_TPREL) {
       relaxTlsLdToLe(loc, rel, val);
     } else {
       checkInt(ctx, loc, val, 32, rel);
@@ -935,7 +903,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_X86_64_GOTTPOFF:
   case R_X86_64_CODE_4_GOTTPOFF:
   case R_X86_64_CODE_6_GOTTPOFF:
-    if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
+    if (rel.expr == R_TPREL) {
       relaxTlsIeToLe(loc, rel, val);
     } else {
       checkInt(ctx, loc, val, 32, rel);
@@ -1396,6 +1364,138 @@ void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
   write32le(buf + 8, ctx.in.plt->getVA() - pltEntryAddr - 12);
 }
 
+template <class ELFT, class RelTy>
+void X86_64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  sec.relocations.reserve(rels.size());
+
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    const RelTy &rel = *it;
+    uint32_t symIdx = rel.getSymbol(false);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+    uint64_t offset = rel.r_offset;
+    RelType type = rel.getType(false);
+    if (sym.isUndefined() && symIdx != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+    int64_t addend = rs.getAddend<ELFT>(rel, type);
+    RelExpr expr;
+    switch (type) {
+    case R_X86_64_NONE:
+      continue;
+
+      // Absolute relocations:
+    case R_X86_64_8:
+    case R_X86_64_16:
+    case R_X86_64_32:
+    case R_X86_64_32S:
+    case R_X86_64_64:
+      expr = R_ABS;
+      break;
+
+      // PC-relative relocations:
+    case R_X86_64_PC8:
+    case R_X86_64_PC16:
+    case R_X86_64_PC32:
+    case R_X86_64_PC64:
+      rs.scanPCRel(type, offset, addend, sym);
+      continue;
+
+      // GOT-generating relocations:
+    case R_X86_64_GOTPC32:
+    case R_X86_64_GOTPC64:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTPLTONLY_PC;
+      break;
+    case R_X86_64_GOTOFF64:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTPLTREL;
+      break;
+    case R_X86_64_GOT32:
+    case R_X86_64_GOT64:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_GOTPLT;
+      break;
+    case R_X86_64_PLTOFF64:
+      ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+      expr = R_PLT_GOTPLT;
+      break;
+    case R_X86_64_GOTPCREL:
+    case R_X86_64_GOTPCRELX:
+    case R_X86_64_REX_GOTPCRELX:
+    case R_X86_64_CODE_4_GOTPCRELX:
+      expr = R_GOT_PC;
+      break;
+
+      // PLT-generating relocation:
+    case R_X86_64_PLT32:
+      rs.scanPlt(type, offset, addend, sym);
+      continue;
+
+      // TLS relocations:
+    case R_X86_64_TPOFF32:
+    case R_X86_64_TPOFF64:
+      if (rs.checkTlsLe(offset, sym, type))
+        continue;
+      expr = R_TPREL;
+      break;
+    case R_X86_64_GOTTPOFF:
+    case R_X86_64_CODE_4_GOTTPOFF:
+    case R_X86_64_CODE_6_GOTTPOFF:
+      rs.handleTlsIe(R_GOT_PC, type, offset, addend, sym);
+      continue;
+    case R_X86_64_TLSGD:
+      if (rs.handleTlsGd(R_TLSGD_PC, R_GOT_PC, R_TPREL, type, offset, addend,
+                         sym))
+        ++it;
+      continue;
+    case R_X86_64_TLSLD:
+      if (rs.handleTlsLd(R_TLSLD_PC, type, offset, addend, sym))
+        ++it;
+      continue;
+    case R_X86_64_DTPOFF32:
+    case R_X86_64_DTPOFF64:
+      sec.addReloc(
+          {ctx.arg.shared ? R_DTPREL : R_TPREL, type, offset, addend, &sym});
+      continue;
+    case R_X86_64_TLSDESC_CALL:
+      // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
+      // rewrites for this relocation are identical.
+      if (!ctx.arg.shared)
+        sec.addReloc({R_TPREL, type, offset, addend, &sym});
+      continue;
+    case R_X86_64_GOTPC32_TLSDESC:
+    case R_X86_64_CODE_4_GOTPC32_TLSDESC:
+      rs.handleTlsDesc(R_TLSDESC_PC, R_GOT_PC, type, offset, addend, sym);
+      continue;
+
+      // Misc relocations:
+    case R_X86_64_SIZE32:
+    case R_X86_64_SIZE64:
+      expr = R_SIZE;
+      break;
+
+    default:
+      Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+               << "unknown relocation (" << type.v << ") against symbol "
+               << &sym;
+      continue;
+    }
+    rs.process(expr, type, offset, sym, addend);
+  }
+
+  if (ctx.arg.branchToBranch)
+    llvm::stable_sort(sec.relocs(),
+                      [](auto &l, auto &r) { return l.offset < r.offset; });
+}
+
+void X86_64::scanSection(InputSectionBase &sec) {
+  if (ctx.arg.is64)
+    elf::scanSection1<X86_64, ELF64LE>(*this, sec);
+  else // ilp32
+    elf::scanSection1<X86_64, ELF32LE>(*this, sec);
+}
+
 void elf::setX86_64TargetInfo(Ctx &ctx) {
   if (ctx.arg.zRetpolineplt) {
     if (ctx.arg.zNow)
diff --git a/lld/ELF/RelocScan.h b/lld/ELF/RelocScan.h
index 01750431b7468..79f04e0448a59 100644
--- a/lld/ELF/RelocScan.h
+++ b/lld/ELF/RelocScan.h
@@ -66,8 +66,100 @@ class RelocScan {
                                 uint64_t relOff) const;
   void process(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
                int64_t addend) const;
+  // Process relocation after needsGot/needsPlt flags are already handled.
+  void processAux(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
+                  int64_t addend) const;
   unsigned handleTlsRelocation(RelExpr expr, RelType type, uint64_t offset,
                                Symbol &sym, int64_t addend);
+
+  // Handle R_PC relocations. These are the most common relocation type, so we
+  // inline the isStaticLinkTimeConstant check.
+  void scanPCRel(RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+    if (LLVM_UNLIKELY(sym.isGnuIFunc()))
+      sym.setFlags(HAS_DIRECT_RELOC);
+    if (!sym.isPreemptible && (!ctx.arg.isPic || !isAbsoluteOrTls(sym))) {
+      sec->addReloc({R_PC, type, offset, addend, &sym});
+      return;
+    }
+    processAux(R_PC, type, offset, sym, addend);
+  }
+
+  // Handle R_PLT_PC relocations. These are very common (calls/branches), so we
+  // inline the isStaticLinkTimeConstant check. Non-preemptible symbols are
+  // optimized to R_PC (direct call).
+  void scanPlt(RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+    if (LLVM_UNLIKELY(sym.isGnuIFunc())) {
+      process(R_PLT_PC, type, offset, sym, addend);
+      return;
+    }
+    if (sym.isPreemptible) {
+      sym.setFlags(NEEDS_PLT);
+      sec->addReloc({R_PLT_PC, type, offset, addend, &sym});
+    } else if (!ctx.arg.isPic || !isAbsoluteOrTls(sym)) {
+      sec->addReloc({R_PC, type, offset, addend, &sym});
+    } else {
+      processAux(R_PC, type, offset, sym, addend);
+    }
+  }
+
+  // Handle TLS Initial-Exec relocation.
+  void handleTlsIe(RelExpr ieExpr, RelType type, uint64_t offset,
+                   int64_t addend, Symbol &sym) {
+    ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+    if (!ctx.arg.shared && !sym.isPreemptible) {
+      sec->addReloc({R_TPREL, type, offset, addend, &sym});
+    } else {
+      sym.setFlags(NEEDS_TLSIE);
+      sec->addReloc({ieExpr, type, offset, addend, &sym});
+    }
+  }
+
+  // Handle TLS Local-Dynamic relocation. Returns true if the __tls_get_addr
+  // call should be skipped (i.e., caller should ++it).
+  bool handleTlsLd(RelExpr sharedExpr, RelType type, uint64_t offset,
+                   int64_t addend, Symbol &sym) {
+    if (ctx.arg.shared) {
+      ctx.needsTlsLd.store(true, std::memory_order_relaxed);
+      sec->addReloc({sharedExpr, type, offset, addend, &sym});
+      return false;
+    }
+    sec->addReloc({R_TPREL, type, offset, addend, &sym});
+    return true;
+  }
+
+  // Handle TLS General-Dynamic relocation. Returns true if the __tls_get_addr
+  // call should be skipped (i.e., caller should ++it).
+  bool handleTlsGd(RelExpr sharedExpr, RelExpr ieExpr, RelExpr leExpr,
+                   RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+    if (ctx.arg.shared) {
+      sym.setFlags(NEEDS_TLSGD);
+      sec->addReloc({sharedExpr, type, offset, addend, &sym});
+      return false;
+    }
+    if (sym.isPreemptible) {
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      sec->addReloc({ieExpr, type, offset, addend, &sym});
+    } else {
+      sec->addReloc({leExpr, type, offset, addend, &sym});
+    }
+    return true;
+  }
+
+  // Handle TLSDESC relocation.
+  void handleTlsDesc(RelExpr sharedExpr, RelExpr ieExpr, RelType type,
+                     uint64_t offset, int64_t addend, Symbol &sym) {
+    if (ctx.arg.shared) {
+      sym.setFlags(NEEDS_TLSDESC);
+      sec->addReloc({sharedExpr, type, offset, addend, &sym});
+    } else if (sym.isPreemptible) {
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      sec->addReloc({ieExpr, type, offset, addend, &sym});
+    } else {
+      sec->addReloc({R_TPREL, type, offset, addend, &sym});
+    }
+  }
 };
 
 template <class ELFT, class RelTy>
@@ -99,8 +191,7 @@ void RelocScan::scan(typename Relocs<RelTy>::const_iterator &it, RelType type,
 
   // Ensure GOT or GOTPLT is created for relocations that reference their base
   // addresses without directly creating entries.
-  if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
-            R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
+  if (oneof<R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
     ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
   } else if (oneof<R_GOTONLY_PC, R_GOTREL, RE_PPC32_PLTREL>(expr)) {
     ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 796c346b2655d..bc339756da481 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -122,7 +122,7 @@ bool elf::isAbsolute(const Symbol &sym) {
   return false;
 }
 
-static bool isAbsoluteOrTls(const Symbol &sym) {
+bool elf::isAbsoluteOrTls(const Symbol &sym) {
   return isAbsolute(sym) || sym.isTls();
 }
 
@@ -973,6 +973,16 @@ void RelocScan::process(RelExpr expr, RelType type, uint64_t offset,
     sym.setFlags(HAS_DIRECT_RELOC);
   }
 
+  processAux(expr, type, offset, sym, addend);
+}
+
+// Process relocation after needsGot/needsPlt flags are already handled.
+// This is the bottom half of process(), handling isStaticLinkTimeConstant
+// check, dynamic relocations, copy relocations, and error reporting.
+void RelocScan::processAux(RelExpr expr, RelType type, uint64_t offset,
+                           Symbol &sym, int64_t addend) const {
+  const bool isIfunc = sym.isGnuIFunc();
+
   // If the relocation is known to be a link-time constant, we know no dynamic
   // relocation will be created, pass the control to relocateAlloc() or
   // relocateNonAlloc() to resolve it.
@@ -1310,7 +1320,7 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
       sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
     } else if (expr != R_TLSIE_HINT) {
       sym.setFlags(NEEDS_TLSIE);
-      // R_GOT needs a relative relocation for PIC on i386 and Hexagon.
+      // R_GOT needs a relative relocation for PIC on Hexagon.
       if (expr == R_GOT && ctx.arg.isPic &&
           !ctx.target->usesOnlyLowPageBits(type))
         addRelativeReloc<true>(ctx, *sec, offset, sym, addend, expr, type);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 680eb66e3356a..f9964a98a58ce 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -175,6 +175,7 @@ void hexagonTLSSymbolUpdate(Ctx &ctx);
 bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
 
 bool isAbsolute(const Symbol &sym);
+bool isAbsoluteOrTls(const Symbol &sym);
 
 class ThunkSection;
 class Thunk;



More information about the llvm-commits mailing list