[lld] [ELF] Add target-specific relocation scanning for x86 (PR #178846)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 30 00:06:45 PST 2026
https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/178846
Implement scanSection/scanSectionImpl for i386 and x86-64 to
* enable devirtualization of getRelExpr calls
* eliminate abstraction overhead for PLT-to-PCRel optimization, TLS relocations
* optimize for R_X86_64_PC32 and R_X86_64_PLT32: they consist of 95%
relocations in `lld/ELF/**/*.o` files.
at the cost of more code.
TLS relocation handling is inlined into scanSectionImpl. Also,
- Remove getTlsGdRelaxSkip
- Replace TLS-optimization-specific expressions:
- R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_LD_TO_LE, R_RELAX_TLS_IE_TO_LE → R_TPREL
- R_RELAX_TLS_GD_TO_IE → R_GOT_PC
This follows the pattern established for MIPS and PPC64 in `#163138`.
getRelExpr is simplified to only handle relocations needed for
relocateNonAlloc and relocateEH.
Average "Scan relocations" time (measured by --time-trace) decreases from 110078 to 101585.
(
```
ruby -rjson -e 't=[]; 50.times { system("numactl -C 0-7 /t/lld1 -flavor gnu --threads=8 @response.txt -o a.out --time-trace"); t << JSON.parse(File.read("a.out.time-trace"))["traceEvents"].find { |e| e["name"] == "Scan relocations" }["dur"] }; puts "Average: #{t.sum / t.size}"'
```
)
>From a3687b18ba50c4d32e28aa595919efdcd0506968 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Wed, 28 Jan 2026 00:24:04 -0800
Subject: [PATCH] [ELF] Add target-specific relocation scanning for x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Implement scanSection/scanSectionImpl for i386 and x86-64 to
* enable devirtualization of getRelExpr calls
* eliminate abstraction overhead for PLT-to-PCRel optimization, TLS relocations
* optimize for R_X86_64_PC32 and R_X86_64_PLT32: they consist of 95%
relocations in `lld/ELF/**/*.o` files.
at the cost of more code.
TLS relocation handling is inlined into scanSectionImpl. Also,
- Remove getTlsGdRelaxSkip
- Replace TLS-optimization-specific expressions:
- R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_LD_TO_LE, R_RELAX_TLS_IE_TO_LE → R_TPREL
- R_RELAX_TLS_GD_TO_IE → R_GOT_PC
This follows the pattern established for MIPS and PPC64 in `#163138`.
getRelExpr is simplified to only handle relocations needed for
relocateNonAlloc and relocateEH.
Average "Scan relocations" time (measured by --time-trace) decreases from 110078 to 101585.
(
```
ruby -rjson -e 't=[]; 50.times { system("numactl -C 0-7 /t/lld1 -flavor gnu --threads=8 @response.txt -o a.out --time-trace"); t << JSON.parse(File.read("a.out.time-trace"))["traceEvents"].find { |e| e["name"] == "Scan relocations" }["dur"] }; puts "Average: #{t.sum / t.size}"'
```
)
---
lld/ELF/Arch/X86.cpp | 243 ++++++++++++++++++++++++++--------------
lld/ELF/Arch/X86_64.cpp | 190 +++++++++++++++++++++++--------
lld/ELF/RelocScan.h | 95 +++++++++++++++-
lld/ELF/Relocations.cpp | 14 ++-
lld/ELF/Relocations.h | 1 +
5 files changed, 407 insertions(+), 136 deletions(-)
diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index 904741fd72b0a..0a90f4954c44d 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "OutputSections.h"
+#include "RelocScan.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@@ -22,9 +23,8 @@ namespace {
class X86 : public TargetInfo {
public:
X86(Ctx &);
- int getTlsGdRelaxSkip(RelType type) const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
- const uint8_t *loc) const override;
+ const uint8_t *loc) const final;
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
void writeGotPltHeader(uint8_t *buf) const override;
RelType getDynRel(RelType type) const override;
@@ -35,8 +35,9 @@ class X86 : public TargetInfo {
uint64_t pltEntryAddr) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
-
- RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
+ template <class ELFT, class RelTy>
+ void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
+ void scanSection(InputSectionBase &sec) override;
void relocateAlloc(InputSection &sec, uint8_t *buf) const override;
private:
@@ -69,11 +70,7 @@ X86::X86(Ctx &ctx) : TargetInfo(ctx) {
defaultImageBase = 0x400000;
}
-int X86::getTlsGdRelaxSkip(RelType type) const {
- // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
- return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
-}
-
+// Only needed to support relocations used by relocateNonAlloc.
RelExpr X86::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
@@ -83,10 +80,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
return R_ABS;
case R_386_TLS_LDO_32:
return R_DTPREL;
- case R_386_TLS_GD:
- return R_TLSGD_GOTPLT;
- case R_386_TLS_LDM:
- return R_TLSLD_GOTPLT;
case R_386_PLT32:
return R_PLT_PC;
case R_386_PC8:
@@ -95,52 +88,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
return R_PC;
case R_386_GOTPC:
return R_GOTPLTONLY_PC;
- case R_386_TLS_IE:
- return R_GOT;
- case R_386_GOT32:
- case R_386_GOT32X:
- // These relocations are arguably mis-designed because their calculations
- // depend on the instructions they are applied to. This is bad because we
- // usually don't care about whether the target section contains valid
- // machine instructions or not. But this is part of the documented ABI, so
- // we had to implement as the standard requires.
- //
- // x86 does not support PC-relative data access. Therefore, in order to
- // access GOT contents, a GOT address needs to be known at link-time
- // (which means non-PIC) or compilers have to emit code to get a GOT
- // address at runtime (which means code is position-independent but
- // compilers need to emit extra code for each GOT access.) This decision
- // is made at compile-time. In the latter case, compilers emit code to
- // load a GOT address to a register, which is usually %ebx.
- //
- // So, there are two ways to refer to symbol foo's GOT entry: foo at GOT or
- // foo at GOT(%ebx).
- //
- // foo at GOT is not usable in PIC. If we are creating a PIC output and if we
- // find such relocation, we should report an error. foo at GOT is resolved to
- // an *absolute* address of foo's GOT entry, because both GOT address and
- // foo's offset are known. In other words, it's G + A.
- //
- // foo at GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
- // foo's GOT entry in the table, because GOT address is not known but foo's
- // offset in the table is known. It's G + A - GOT.
- //
- // It's unfortunate that compilers emit the same relocation for these
- // different use cases. In order to distinguish them, we have to read a
- // machine instruction.
- //
- // The following code implements it. We assume that Loc[0] is the first byte
- // of a displacement or an immediate field of a valid machine
- // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
- // the byte, we can determine whether the instruction uses the operand as an
- // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
- return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
- case R_386_TLS_GOTDESC:
- return R_TLSDESC_GOTPLT;
- case R_386_TLS_DESC_CALL:
- return R_TLSDESC_CALL;
- case R_386_TLS_GOTIE:
- return R_GOTPLT;
case R_386_GOTOFF:
return R_GOTPLTREL;
case R_386_TLS_LE:
@@ -156,18 +103,6 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
}
}
-RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
- switch (expr) {
- default:
- return expr;
- case R_RELAX_TLS_GD_TO_IE:
- return R_RELAX_TLS_GD_TO_IE_GOTPLT;
- case R_RELAX_TLS_GD_TO_LE:
- return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
- : R_RELAX_TLS_GD_TO_LE;
- }
-}
-
void X86::writeGotPltHeader(uint8_t *buf) const {
write32le(buf, ctx.mainPart->dynamic->getVA());
}
@@ -411,11 +346,6 @@ void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
loc[-2] = 0x8b;
write32le(loc, val);
- } else {
- // Convert call *x at tlsdesc(%eax) to xchg ax, ax.
- assert(rel.type == R_386_TLS_DESC_CALL);
- loc[0] = 0x66;
- loc[1] = 0x90;
}
}
@@ -497,19 +427,30 @@ void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
uint8_t *loc = buf + rel.offset;
const uint64_t val =
SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
- switch (rel.expr) {
- case R_RELAX_TLS_GD_TO_IE_GOTPLT:
- relaxTlsGdToIe(loc, rel, val);
+ switch (rel.type) {
+ case R_386_TLS_GD:
+ case R_386_TLS_GOTDESC:
+ case R_386_TLS_DESC_CALL:
+ if (rel.expr == R_TPREL || rel.expr == R_TPREL_NEG)
+ relaxTlsGdToLe(loc, rel, val);
+ else if (rel.expr == R_GOTPLT)
+ relaxTlsGdToIe(loc, rel, val);
+ else
+ relocate(loc, rel, val);
continue;
- case R_RELAX_TLS_GD_TO_LE:
- case R_RELAX_TLS_GD_TO_LE_NEG:
- relaxTlsGdToLe(loc, rel, val);
+ case R_386_TLS_LDM:
+ case R_386_TLS_LDO_32:
+ if (rel.expr == R_TPREL)
+ relaxTlsLdToLe(loc, rel, val);
+ else
+ relocate(loc, rel, val);
continue;
- case R_RELAX_TLS_LD_TO_LE:
- relaxTlsLdToLe(loc, rel, val);
- break;
- case R_RELAX_TLS_IE_TO_LE:
- relaxTlsIeToLe(loc, rel, val);
+ case R_386_TLS_IE:
+ case R_386_TLS_GOTIE:
+ if (rel.expr == R_TPREL)
+ relaxTlsIeToLe(loc, rel, val);
+ else
+ relocate(loc, rel, val);
continue;
default:
relocate(loc, rel, val);
@@ -518,6 +459,134 @@ void X86::relocateAlloc(InputSection &sec, uint8_t *buf) const {
}
}
+template <class ELFT, class RelTy>
+void X86::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+ RelocScan rs(ctx, &sec);
+ sec.relocations.reserve(rels.size());
+
+ for (auto it = rels.begin(); it != rels.end(); ++it) {
+ const RelTy &rel = *it;
+ uint32_t symIdx = rel.getSymbol(false);
+ Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+ uint64_t offset = rel.r_offset;
+ RelType type = rel.getType(false);
+ if (sym.isUndefined() && symIdx != 0 &&
+ rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+ continue;
+ int64_t addend = rs.getAddend<ELFT>(rel, type);
+ RelExpr expr;
+ switch (type) {
+ case R_386_NONE:
+ continue;
+
+ // Absolute relocations:
+ case R_386_8:
+ case R_386_16:
+ case R_386_32:
+ expr = R_ABS;
+ break;
+
+ // PC-relative relocations:
+ case R_386_PC8:
+ case R_386_PC16:
+ case R_386_PC32:
+ rs.scanPCRel(type, offset, addend, sym);
+ continue;
+
+ // PLT-generating relocation:
+ case R_386_PLT32:
+ rs.scanPlt(type, offset, addend, sym);
+ continue;
+
+ // GOT-related relocations:
+ case R_386_GOTPC:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_GOTPLTONLY_PC;
+ break;
+ case R_386_GOTOFF:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_GOTPLTREL;
+ break;
+ case R_386_GOT32:
+ case R_386_GOT32X:
+ // These relocations return R_GOT or R_GOTPLT depending on instruction
+ // encoding. R_GOT is absolute, R_GOTPLT is relative to GOT base.
+ expr =
+ (sec.content().data()[offset - 1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
+ if (expr == R_GOTPLT)
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ break;
+
+ // TLS relocations:
+ case R_386_TLS_LE:
+ if (rs.checkTlsLe(offset, sym, type))
+ continue;
+ expr = R_TPREL;
+ break;
+ case R_386_TLS_LE_32:
+ if (rs.checkTlsLe(offset, sym, type))
+ continue;
+ expr = R_TPREL_NEG;
+ break;
+ case R_386_TLS_IE:
+ ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+ if (!ctx.arg.shared && !sym.isPreemptible) {
+ sec.addReloc({R_TPREL, type, offset, addend, &sym});
+ } else {
+ sym.setFlags(NEEDS_TLSIE);
+ // In PIC, the absolute GOT address needs a RELATIVE dynamic relocation.
+ if (ctx.arg.isPic)
+ sec.getPartition(ctx).relaDyn->addRelativeReloc(
+ ctx.target->relativeRel, sec, offset, sym, addend, type, R_GOT);
+ else
+ sec.addReloc({R_GOT, type, offset, addend, &sym});
+ }
+ continue;
+ case R_386_TLS_GOTIE:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ rs.handleTlsIe(R_GOTPLT, type, offset, addend, sym);
+ continue;
+ case R_386_TLS_GD:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ // Use R_TPREL_NEG for negative TP offset.
+ if (rs.handleTlsGd(R_TLSGD_GOTPLT, R_GOTPLT, R_TPREL_NEG, type, offset,
+ addend, sym))
+ ++it;
+ continue;
+ case R_386_TLS_LDM:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ if (rs.handleTlsLd(R_TLSLD_GOTPLT, type, offset, addend, sym))
+ ++it;
+ continue;
+ case R_386_TLS_LDO_32:
+ sec.addReloc(
+ {ctx.arg.shared ? R_DTPREL : R_TPREL, type, offset, addend, &sym});
+ continue;
+ case R_386_TLS_GOTDESC:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ rs.handleTlsDesc(R_TLSDESC_GOTPLT, R_GOTPLT, type, offset, addend, sym);
+ continue;
+ case R_386_TLS_DESC_CALL:
+ // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
+ // rewrites for this relocation are identical.
+ if (!ctx.arg.shared)
+ sec.addReloc({R_TPREL, type, offset, addend, &sym});
+ continue;
+
+ default:
+ Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+ << "unknown relocation (" << type.v << ") against symbol "
+ << &sym;
+ continue;
+ }
+ rs.process(expr, type, offset, sym, addend);
+ }
+}
+
+void X86::scanSection(InputSectionBase &sec) {
+ elf::scanSection1<X86, ELF32LE>(*this, sec);
+}
+
// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
// entries containing endbr32 instructions. A PLT entry will be split into two
// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 9083b5b9ff250..d08e605621143 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "OutputSections.h"
+#include "RelocScan.h"
#include "Relocations.h"
#include "Symbols.h"
#include "SyntheticSections.h"
@@ -27,9 +28,8 @@ namespace {
class X86_64 : public TargetInfo {
public:
X86_64(Ctx &);
- int getTlsGdRelaxSkip(RelType type) const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
- const uint8_t *loc) const override;
+ const uint8_t *loc) const final;
RelType getDynRel(RelType type) const override;
void writeGotPltHeader(uint8_t *buf) const override;
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
@@ -51,6 +51,9 @@ class X86_64 : public TargetInfo {
InputSection *nextIS) const override;
bool relaxOnce(int pass) const override;
void applyBranchToBranchOpt() const override;
+ template <class ELFT, class RelTy>
+ void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
+ void scanSection(InputSectionBase &sec) override;
private:
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -98,15 +101,6 @@ X86_64::X86_64(Ctx &ctx) : TargetInfo(ctx) {
defaultImageBase = 0x200000;
}
-int X86_64::getTlsGdRelaxSkip(RelType type) const {
- // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
- return type == R_X86_64_GOTPC32_TLSDESC ||
- type == R_X86_64_CODE_4_GOTPC32_TLSDESC ||
- type == R_X86_64_TLSDESC_CALL
- ? 1
- : 2;
-}
-
// Opcodes for the different X86_64 jmp instructions.
enum JmpInsnOpcode : uint32_t {
J_JMP_32,
@@ -361,6 +355,7 @@ bool X86_64::relaxOnce(int pass) const {
return changed;
}
+// Only needed to support relocations used by relocateNonAlloc and relocateEH.
RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
@@ -370,21 +365,15 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_32S:
case R_X86_64_64:
return R_ABS;
+ case R_X86_64_SIZE32:
+ case R_X86_64_SIZE64:
+ return R_SIZE;
case R_X86_64_DTPOFF32:
case R_X86_64_DTPOFF64:
return R_DTPREL;
case R_X86_64_TPOFF32:
case R_X86_64_TPOFF64:
return R_TPREL;
- case R_X86_64_TLSDESC_CALL:
- return R_TLSDESC_CALL;
- case R_X86_64_TLSLD:
- return R_TLSLD_PC;
- case R_X86_64_TLSGD:
- return R_TLSGD_PC;
- case R_X86_64_SIZE32:
- case R_X86_64_SIZE64:
- return R_SIZE;
case R_X86_64_PLT32:
return R_PLT_PC;
case R_X86_64_PC8:
@@ -392,24 +381,8 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_PC32:
case R_X86_64_PC64:
return R_PC;
- case R_X86_64_GOT32:
- case R_X86_64_GOT64:
- return R_GOTPLT;
- case R_X86_64_GOTPC32_TLSDESC:
- case R_X86_64_CODE_4_GOTPC32_TLSDESC:
- return R_TLSDESC_PC;
- case R_X86_64_GOTPCREL:
- case R_X86_64_GOTPCRELX:
- case R_X86_64_REX_GOTPCRELX:
- case R_X86_64_CODE_4_GOTPCRELX:
- case R_X86_64_GOTTPOFF:
- case R_X86_64_CODE_4_GOTTPOFF:
- case R_X86_64_CODE_6_GOTTPOFF:
- return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
- case R_X86_64_PLTOFF64:
- return R_PLT_GOTPLT;
case R_X86_64_GOTPC32:
case R_X86_64_GOTPC64:
return R_GOTPLTONLY_PC;
@@ -558,11 +531,6 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
loc[-2] = 0x8b;
write32le(loc, val);
- } else {
- // Convert call *x at tlsdesc(%rax) to xchg ax, ax.
- assert(rel.type == R_X86_64_TLSDESC_CALL);
- loc[0] = 0x66;
- loc[1] = 0x90;
}
}
@@ -915,9 +883,9 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_X86_64_CODE_4_GOTPC32_TLSDESC:
case R_X86_64_TLSDESC_CALL:
case R_X86_64_TLSGD:
- if (rel.expr == R_RELAX_TLS_GD_TO_LE) {
+ if (rel.expr == R_TPREL) {
relaxTlsGdToLe(loc, rel, val);
- } else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {
+ } else if (rel.expr == R_GOT_PC) {
relaxTlsGdToIe(loc, rel, val);
} else {
checkInt(ctx, loc, val, 32, rel);
@@ -925,7 +893,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
break;
case R_X86_64_TLSLD:
- if (rel.expr == R_RELAX_TLS_LD_TO_LE) {
+ if (rel.expr == R_TPREL) {
relaxTlsLdToLe(loc, rel, val);
} else {
checkInt(ctx, loc, val, 32, rel);
@@ -935,7 +903,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_CODE_6_GOTTPOFF:
- if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
+ if (rel.expr == R_TPREL) {
relaxTlsIeToLe(loc, rel, val);
} else {
checkInt(ctx, loc, val, 32, rel);
@@ -1396,6 +1364,138 @@ void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
write32le(buf + 8, ctx.in.plt->getVA() - pltEntryAddr - 12);
}
+template <class ELFT, class RelTy>
+void X86_64::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+ RelocScan rs(ctx, &sec);
+ sec.relocations.reserve(rels.size());
+
+ for (auto it = rels.begin(); it != rels.end(); ++it) {
+ const RelTy &rel = *it;
+ uint32_t symIdx = rel.getSymbol(false);
+ Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIdx);
+ uint64_t offset = rel.r_offset;
+ RelType type = rel.getType(false);
+ if (sym.isUndefined() && symIdx != 0 &&
+ rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+ continue;
+ int64_t addend = rs.getAddend<ELFT>(rel, type);
+ RelExpr expr;
+ switch (type) {
+ case R_X86_64_NONE:
+ continue;
+
+ // Absolute relocations:
+ case R_X86_64_8:
+ case R_X86_64_16:
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ expr = R_ABS;
+ break;
+
+ // PC-relative relocations:
+ case R_X86_64_PC8:
+ case R_X86_64_PC16:
+ case R_X86_64_PC32:
+ case R_X86_64_PC64:
+ rs.scanPCRel(type, offset, addend, sym);
+ continue;
+
+ // GOT-generating relocations:
+ case R_X86_64_GOTPC32:
+ case R_X86_64_GOTPC64:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_GOTPLTONLY_PC;
+ break;
+ case R_X86_64_GOTOFF64:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_GOTPLTREL;
+ break;
+ case R_X86_64_GOT32:
+ case R_X86_64_GOT64:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_GOTPLT;
+ break;
+ case R_X86_64_PLTOFF64:
+ ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
+ expr = R_PLT_GOTPLT;
+ break;
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ case R_X86_64_CODE_4_GOTPCRELX:
+ expr = R_GOT_PC;
+ break;
+
+ // PLT-generating relocation:
+ case R_X86_64_PLT32:
+ rs.scanPlt(type, offset, addend, sym);
+ continue;
+
+ // TLS relocations:
+ case R_X86_64_TPOFF32:
+ case R_X86_64_TPOFF64:
+ if (rs.checkTlsLe(offset, sym, type))
+ continue;
+ expr = R_TPREL;
+ break;
+ case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
+ case R_X86_64_CODE_6_GOTTPOFF:
+ rs.handleTlsIe(R_GOT_PC, type, offset, addend, sym);
+ continue;
+ case R_X86_64_TLSGD:
+ if (rs.handleTlsGd(R_TLSGD_PC, R_GOT_PC, R_TPREL, type, offset, addend,
+ sym))
+ ++it;
+ continue;
+ case R_X86_64_TLSLD:
+ if (rs.handleTlsLd(R_TLSLD_PC, type, offset, addend, sym))
+ ++it;
+ continue;
+ case R_X86_64_DTPOFF32:
+ case R_X86_64_DTPOFF64:
+ sec.addReloc(
+ {ctx.arg.shared ? R_DTPREL : R_TPREL, type, offset, addend, &sym});
+ continue;
+ case R_X86_64_TLSDESC_CALL:
+ // For executables, TLSDESC is optimized to IE or LE. Use R_TPREL as the
+ // rewrites for this relocation are identical.
+ if (!ctx.arg.shared)
+ sec.addReloc({R_TPREL, type, offset, addend, &sym});
+ continue;
+ case R_X86_64_GOTPC32_TLSDESC:
+ case R_X86_64_CODE_4_GOTPC32_TLSDESC:
+ rs.handleTlsDesc(R_TLSDESC_PC, R_GOT_PC, type, offset, addend, sym);
+ continue;
+
+ // Misc relocations:
+ case R_X86_64_SIZE32:
+ case R_X86_64_SIZE64:
+ expr = R_SIZE;
+ break;
+
+ default:
+ Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+ << "unknown relocation (" << type.v << ") against symbol "
+ << &sym;
+ continue;
+ }
+ rs.process(expr, type, offset, sym, addend);
+ }
+
+ if (ctx.arg.branchToBranch)
+ llvm::stable_sort(sec.relocs(),
+ [](auto &l, auto &r) { return l.offset < r.offset; });
+}
+
+void X86_64::scanSection(InputSectionBase &sec) {
+ if (ctx.arg.is64)
+ elf::scanSection1<X86_64, ELF64LE>(*this, sec);
+ else // ilp32
+ elf::scanSection1<X86_64, ELF32LE>(*this, sec);
+}
+
void elf::setX86_64TargetInfo(Ctx &ctx) {
if (ctx.arg.zRetpolineplt) {
if (ctx.arg.zNow)
diff --git a/lld/ELF/RelocScan.h b/lld/ELF/RelocScan.h
index 01750431b7468..79f04e0448a59 100644
--- a/lld/ELF/RelocScan.h
+++ b/lld/ELF/RelocScan.h
@@ -66,8 +66,100 @@ class RelocScan {
uint64_t relOff) const;
void process(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
int64_t addend) const;
+ // Process relocation after needsGot/needsPlt flags are already handled.
+ void processAux(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
+ int64_t addend) const;
unsigned handleTlsRelocation(RelExpr expr, RelType type, uint64_t offset,
Symbol &sym, int64_t addend);
+
+ // Handle R_PC relocations. These are the most common relocation type, so we
+ // inline the isStaticLinkTimeConstant check.
+ void scanPCRel(RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+ if (LLVM_UNLIKELY(sym.isGnuIFunc()))
+ sym.setFlags(HAS_DIRECT_RELOC);
+ if (!sym.isPreemptible && (!ctx.arg.isPic || !isAbsoluteOrTls(sym))) {
+ sec->addReloc({R_PC, type, offset, addend, &sym});
+ return;
+ }
+ processAux(R_PC, type, offset, sym, addend);
+ }
+
+ // Handle R_PLT_PC relocations. These are very common (calls/branches), so we
+ // inline the isStaticLinkTimeConstant check. Non-preemptible symbols are
+ // optimized to R_PC (direct call).
+ void scanPlt(RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+ if (LLVM_UNLIKELY(sym.isGnuIFunc())) {
+ process(R_PLT_PC, type, offset, sym, addend);
+ return;
+ }
+ if (sym.isPreemptible) {
+ sym.setFlags(NEEDS_PLT);
+ sec->addReloc({R_PLT_PC, type, offset, addend, &sym});
+ } else if (!ctx.arg.isPic || !isAbsoluteOrTls(sym)) {
+ sec->addReloc({R_PC, type, offset, addend, &sym});
+ } else {
+ processAux(R_PC, type, offset, sym, addend);
+ }
+ }
+
+ // Handle TLS Initial-Exec relocation.
+ void handleTlsIe(RelExpr ieExpr, RelType type, uint64_t offset,
+ int64_t addend, Symbol &sym) {
+ ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+ if (!ctx.arg.shared && !sym.isPreemptible) {
+ sec->addReloc({R_TPREL, type, offset, addend, &sym});
+ } else {
+ sym.setFlags(NEEDS_TLSIE);
+ sec->addReloc({ieExpr, type, offset, addend, &sym});
+ }
+ }
+
+ // Handle TLS Local-Dynamic relocation. Returns true if the __tls_get_addr
+ // call should be skipped (i.e., caller should ++it).
+ bool handleTlsLd(RelExpr sharedExpr, RelType type, uint64_t offset,
+ int64_t addend, Symbol &sym) {
+ if (ctx.arg.shared) {
+ ctx.needsTlsLd.store(true, std::memory_order_relaxed);
+ sec->addReloc({sharedExpr, type, offset, addend, &sym});
+ return false;
+ }
+ sec->addReloc({R_TPREL, type, offset, addend, &sym});
+ return true;
+ }
+
+ // Handle TLS General-Dynamic relocation. Returns true if the __tls_get_addr
+ // call should be skipped (i.e., caller should ++it).
+ bool handleTlsGd(RelExpr sharedExpr, RelExpr ieExpr, RelExpr leExpr,
+ RelType type, uint64_t offset, int64_t addend, Symbol &sym) {
+ if (ctx.arg.shared) {
+ sym.setFlags(NEEDS_TLSGD);
+ sec->addReloc({sharedExpr, type, offset, addend, &sym});
+ return false;
+ }
+ if (sym.isPreemptible) {
+ ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+ sym.setFlags(NEEDS_TLSIE);
+ sec->addReloc({ieExpr, type, offset, addend, &sym});
+ } else {
+ sec->addReloc({leExpr, type, offset, addend, &sym});
+ }
+ return true;
+ }
+
+ // Handle TLSDESC relocation.
+ void handleTlsDesc(RelExpr sharedExpr, RelExpr ieExpr, RelType type,
+ uint64_t offset, int64_t addend, Symbol &sym) {
+ if (ctx.arg.shared) {
+ sym.setFlags(NEEDS_TLSDESC);
+ sec->addReloc({sharedExpr, type, offset, addend, &sym});
+ } else if (sym.isPreemptible) {
+ ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+ sym.setFlags(NEEDS_TLSIE);
+ sec->addReloc({ieExpr, type, offset, addend, &sym});
+ } else {
+ sec->addReloc({R_TPREL, type, offset, addend, &sym});
+ }
+ }
};
template <class ELFT, class RelTy>
@@ -99,8 +191,7 @@ void RelocScan::scan(typename Relocs<RelTy>::const_iterator &it, RelType type,
// Ensure GOT or GOTPLT is created for relocations that reference their base
// addresses without directly creating entries.
- if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
- R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
+ if (oneof<R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
ctx.in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
} else if (oneof<R_GOTONLY_PC, R_GOTREL, RE_PPC32_PLTREL>(expr)) {
ctx.in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 796c346b2655d..bc339756da481 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -122,7 +122,7 @@ bool elf::isAbsolute(const Symbol &sym) {
return false;
}
-static bool isAbsoluteOrTls(const Symbol &sym) {
+bool elf::isAbsoluteOrTls(const Symbol &sym) {
return isAbsolute(sym) || sym.isTls();
}
@@ -973,6 +973,16 @@ void RelocScan::process(RelExpr expr, RelType type, uint64_t offset,
sym.setFlags(HAS_DIRECT_RELOC);
}
+ processAux(expr, type, offset, sym, addend);
+}
+
+// Process relocation after needsGot/needsPlt flags are already handled.
+// This is the bottom half of process(), handling isStaticLinkTimeConstant
+// check, dynamic relocations, copy relocations, and error reporting.
+void RelocScan::processAux(RelExpr expr, RelType type, uint64_t offset,
+ Symbol &sym, int64_t addend) const {
+ const bool isIfunc = sym.isGnuIFunc();
+
// If the relocation is known to be a link-time constant, we know no dynamic
// relocation will be created, pass the control to relocateAlloc() or
// relocateNonAlloc() to resolve it.
@@ -1310,7 +1320,7 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
} else if (expr != R_TLSIE_HINT) {
sym.setFlags(NEEDS_TLSIE);
- // R_GOT needs a relative relocation for PIC on i386 and Hexagon.
+ // R_GOT needs a relative relocation for PIC on Hexagon.
if (expr == R_GOT && ctx.arg.isPic &&
!ctx.target->usesOnlyLowPageBits(type))
addRelativeReloc<true>(ctx, *sec, offset, sym, addend, expr, type);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 680eb66e3356a..f9964a98a58ce 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -175,6 +175,7 @@ void hexagonTLSSymbolUpdate(Ctx &ctx);
bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
bool isAbsolute(const Symbol &sym);
+bool isAbsoluteOrTls(const Symbol &sym);
class ThunkSection;
class Thunk;
More information about the llvm-commits
mailing list