[lld] Reapply "ELF: Add branch-to-branch optimization." (PR #145579)
Peter Collingbourne via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 12:52:51 PDT 2025
https://github.com/pcc created https://github.com/llvm/llvm-project/pull/145579
Fixed assertion failure when reading .eh_frame sections, and added
.eh_frame sections to tests.
This reverts commit 1e95349dbe329938d2962a78baa0ec421e9cd7d1.
>From cd48453ba62329ed91466707973d8f8ec99dc506 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Tue, 24 Jun 2025 12:52:37 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
lld/ELF/Arch/AArch64.cpp | 58 ++++++++++
lld/ELF/Arch/TargetImpl.h | 93 ++++++++++++++++
lld/ELF/Arch/X86_64.cpp | 68 ++++++++++++
lld/ELF/Config.h | 1 +
lld/ELF/Driver.cpp | 2 +
lld/ELF/InputSection.cpp | 5 +-
lld/ELF/Options.td | 4 +
lld/ELF/Relocations.cpp | 8 +-
lld/ELF/Target.h | 1 +
lld/docs/ReleaseNotes.rst | 4 +
lld/docs/ld.lld.1 | 9 +-
lld/test/ELF/aarch64-branch-to-branch.s | 84 +++++++++++++++
lld/test/ELF/x86-64-branch-to-branch.s | 135 ++++++++++++++++++++++++
13 files changed, 466 insertions(+), 6 deletions(-)
create mode 100644 lld/ELF/Arch/TargetImpl.h
create mode 100644 lld/test/ELF/aarch64-branch-to-branch.s
create mode 100644 lld/test/ELF/x86-64-branch-to-branch.s
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 8a225ed103eef..1812f2af419d2 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -11,6 +11,7 @@
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
+#include "TargetImpl.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Endian.h"
@@ -82,6 +83,7 @@ class AArch64 : public TargetInfo {
uint64_t val) const override;
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
+ void applyBranchToBranchOpt() const override;
private:
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -974,6 +976,62 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
}
}
+static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
+ Relocation &r) {
+ // Identify a control transfer relocation for the branch-to-branch
+ // optimization. A "control transfer relocation" means a B or BL
+ // target but it also includes relative vtable relocations for example.
+ //
+ // We require the relocation type to be JUMP26, CALL26 or PLT32. With a
+ // relocation type of PLT32 the value may be assumed to be used for branching
+ // directly to the symbol and the addend is only used to produce the relocated
+ // value (hence the effective addend is always 0). This is because if a PLT is
+ // needed the addend will be added to the address of the PLT, and it doesn't
+ // make sense to branch into the middle of a PLT. For example, relative vtable
+ // relocations use PLT32 and 0 or a positive value as the addend but still are
+ // used to branch to the symbol.
+ //
+ // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
+ // addend is that we are branching to symbol+addend so that becomes the
+ // effective addend.
+ if (r.type == R_AARCH64_PLT32)
+ return 0;
+ if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
+ return r.addend;
+ return std::nullopt;
+}
+
+static std::pair<Relocation *, uint64_t>
+getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
+ auto *i = llvm::partition_point(
+ is.relocations, [&](Relocation &r) { return r.offset < offset; });
+ if (i != is.relocations.end() && i->offset == offset &&
+ i->type == R_AARCH64_JUMP26) {
+ return {i, i->addend};
+ }
+ return {nullptr, 0};
+}
+
+static void redirectControlTransferRelocations(Relocation &r1,
+ const Relocation &r2) {
+ r1.expr = r2.expr;
+ r1.sym = r2.sym;
+ // With PLT32 we must respect the original addend as that affects the value's
+ // interpretation. With the other relocation types the original addend is
+ // irrelevant because it referred to an offset within the original target
+ // section so we overwrite it.
+ if (r1.type == R_AARCH64_PLT32)
+ r1.addend += r2.addend;
+ else
+ r1.addend = r2.addend;
+}
+
+void AArch64::applyBranchToBranchOpt() const {
+ applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
+ getBranchInfoAtTarget,
+ redirectControlTransferRelocations);
+}
+
// AArch64 may use security features in variant PLT sequences. These are:
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h
new file mode 100644
index 0000000000000..90a2b53cbb2af
--- /dev/null
+++ b/lld/ELF/Arch/TargetImpl.h
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_ARCH_TARGETIMPL_H
+#define LLD_ELF_ARCH_TARGETIMPL_H
+
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+namespace lld::elf {
+
+// getControlTransferAddend: If this relocation is used for control transfer
+// instructions (e.g. branch, branch-link or call) or code references (e.g.
+// virtual function pointers) and indicates an address-insignificant reference,
+// return the effective addend for the relocation, otherwise return
+// std::nullopt. The effective addend for a relocation is the addend that is
+// used to determine its branch destination.
+//
+// getBranchInfoAtTarget: If a control transfer relocation referring to
+// is+offset directly transfers control to a relocated branch instruction in the
+// specified section, return the relocation for the branch target as well as its
+// effective addend (see above). Otherwise return {nullptr, 0}.
+//
+// redirectControlTransferRelocations: Given r1, a relocation for which
+// getControlTransferAddend() returned a value, and r2, a relocation returned by
+// getBranchInfo(), modify r1 so that it branches directly to the target of r2.
+template <typename GetControlTransferAddend, typename GetBranchInfoAtTarget,
+ typename RedirectControlTransferRelocations>
+inline void applyBranchToBranchOptImpl(
+ Ctx &ctx, GetControlTransferAddend getControlTransferAddend,
+ GetBranchInfoAtTarget getBranchInfoAtTarget,
+ RedirectControlTransferRelocations redirectControlTransferRelocations) {
+ // Needs to run serially because it writes to the relocations array as well as
+ // reading relocations of other sections.
+ for (ELFFileBase *f : ctx.objectFiles) {
+ auto getRelocBranchInfo =
+ [&getBranchInfoAtTarget](
+ Relocation &r,
+ uint64_t addend) -> std::pair<Relocation *, uint64_t> {
+ auto *target = dyn_cast_or_null<Defined>(r.sym);
+ // We don't allow preemptible symbols or ifuncs (may go somewhere else),
+ // absolute symbols (runtime behavior unknown), non-executable or writable
+ // memory (ditto) or non-regular sections (no section data).
+ if (!target || target->isPreemptible || target->isGnuIFunc() ||
+ !target->section ||
+ !(target->section->flags & llvm::ELF::SHF_EXECINSTR) ||
+ (target->section->flags & llvm::ELF::SHF_WRITE) ||
+ target->section->kind() != SectionBase::Regular)
+ return {nullptr, 0};
+ return getBranchInfoAtTarget(*cast<InputSection>(target->section),
+ target->value + addend);
+ };
+ for (InputSectionBase *sb : f->getSections()) {
+ auto *s = dyn_cast_or_null<InputSection>(sb);
+ if (!s)
+ continue;
+ for (Relocation &r : s->relocations) {
+ std::optional<uint64_t> addend = getControlTransferAddend(*s, r);
+ if (!addend)
+ continue;
+ std::pair<Relocation *, uint64_t> targetAndAddend =
+ getRelocBranchInfo(r, *addend);
+ if (!targetAndAddend.first)
+ continue;
+ // Avoid getting stuck in an infinite loop if we encounter a branch
+ // that (possibly indirectly) branches to itself. It is unlikely
+ // that more than 5 iterations will ever be needed in practice.
+ size_t iterations = 5;
+ while (iterations--) {
+ std::pair<Relocation *, uint64_t> nextTargetAndAddend =
+ getRelocBranchInfo(*targetAndAddend.first,
+ targetAndAddend.second);
+ if (!nextTargetAndAddend.first)
+ break;
+ targetAndAddend = nextTargetAndAddend;
+ }
+ redirectControlTransferRelocations(r, *targetAndAddend.first);
+ }
+ }
+ }
+}
+
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 974da4d96320a..163505102d0ec 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -11,6 +11,7 @@
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
+#include "TargetImpl.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MathExtras.h"
@@ -49,6 +50,7 @@ class X86_64 : public TargetInfo {
bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
InputSection *nextIS) const override;
bool relaxOnce(int pass) const override;
+ void applyBranchToBranchOpt() const override;
private:
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -1161,6 +1163,72 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
}
}
+static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
+ Relocation &r) {
+ // Identify a control transfer relocation for the branch-to-branch
+ // optimization. A "control transfer relocation" usually means a CALL or JMP
+ // target but it also includes relative vtable relocations for example.
+ //
+ // We require the relocation type to be PLT32. With a relocation type of PLT32
+ // the value may be assumed to be used for branching directly to the symbol
+ // and the addend is only used to produce the relocated value (hence the
+ // effective addend is always 0). This is because if a PLT is needed the
+ // addend will be added to the address of the PLT, and it doesn't make sense
+ // to branch into the middle of a PLT. For example, relative vtable
+ // relocations use PLT32 and 0 or a positive value as the addend but still are
+ // used to branch to the symbol.
+ //
+ // STT_SECTION symbols are a special case on x86 because the LLVM assembler
+ // uses them for branches to local symbols which are assembled as referring to
+ // the section symbol with the addend equal to the symbol value - 4.
+ if (r.type == R_X86_64_PLT32) {
+ if (r.sym->isSection())
+ return r.addend + 4;
+ return 0;
+ }
+ return std::nullopt;
+}
+
+static std::pair<Relocation *, uint64_t>
+getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
+ auto content = is.contentMaybeDecompress();
+ if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate
+ auto *i = llvm::partition_point(
+ is.relocations, [&](Relocation &r) { return r.offset < offset + 1; });
+ // Unlike with getControlTransferAddend() it is valid to accept a PC32
+ // relocation here because we know that this is actually a JMP and not some
+ // other reference, so the interpretation is that we add 4 to the addend and
+ // use that as the effective addend.
+ if (i != is.relocations.end() && i->offset == offset + 1 &&
+ (i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) {
+ return {i, i->addend + 4};
+ }
+ }
+ return {nullptr, 0};
+}
+
+static void redirectControlTransferRelocations(Relocation &r1,
+ const Relocation &r2) {
+ // The isSection() check handles the STT_SECTION case described above.
+ // In that case the original addend is irrelevant because it referred to an
+ // offset within the original target section so we overwrite it.
+ //
+ // The +4 is here to compensate for r2.addend which will likely be -4,
+ // but may also be addend-4 in case of a PC32 branch to symbol+addend.
+ if (r1.sym->isSection())
+ r1.addend = r2.addend;
+ else
+ r1.addend += r2.addend + 4;
+ r1.expr = r2.expr;
+ r1.sym = r2.sym;
+}
+
+void X86_64::applyBranchToBranchOpt() const {
+ applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
+ getBranchInfoAtTarget,
+ redirectControlTransferRelocations);
+}
+
// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
// entries containing endbr64 instructions. A PLT entry will be split into two
// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 2b72d54ba410d..88bda41d36487 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -302,6 +302,7 @@ struct Config {
bool bpFunctionOrderForCompression = false;
bool bpDataOrderForCompression = false;
bool bpVerboseSectionOrderer = false;
+ bool branchToBranch = false;
bool checkSections;
bool checkDynamicRelocs;
std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7e132a387a04d..1e0b5988343a6 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1644,6 +1644,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.zWxneeded = hasZOption(args, "wxneeded");
setUnresolvedSymbolPolicy(ctx, args);
ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
+ ctx.arg.branchToBranch = args.hasFlag(
+ OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2);
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
if (arg->getOption().matches(OPT_eb))
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 0ce0f08d03874..f8786265029e8 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -430,8 +430,9 @@ InputSectionBase *InputSection::getRelocatedSection() const {
template <class ELFT, class RelTy>
void InputSection::copyRelocations(Ctx &ctx, uint8_t *buf) {
- if (ctx.arg.relax && !ctx.arg.relocatable &&
- (ctx.arg.emachine == EM_RISCV || ctx.arg.emachine == EM_LOONGARCH)) {
+ bool linkerRelax =
+ ctx.arg.relax && is_contained({EM_RISCV, EM_LOONGARCH}, ctx.arg.emachine);
+ if (!ctx.arg.relocatable && (linkerRelax || ctx.arg.branchToBranch)) {
// On LoongArch and RISC-V, relaxation might change relocations: copy
// from internal ones that are updated by relaxation.
InputSectionBase *sec = getRelocatedSection();
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c795147eb9662..d7e3313167007 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -59,6 +59,10 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">,
MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">;
def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">;
+defm branch_to_branch: BB<"branch-to-branch",
+ "Enable branch-to-branch optimization (default at -O2)",
+ "Disable branch-to-branch optimization (default at -O0 and -O1)">;
+
defm check_sections: B<"check-sections",
"Check section addresses for overlaps (default)",
"Do not check section addresses for overlaps">;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 6c4209a2b81ed..43f19186f0981 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1665,9 +1665,10 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
}
// Sort relocations by offset for more efficient searching for
- // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
+ // R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization.
if (ctx.arg.emachine == EM_RISCV ||
- (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc"))
+ (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
+ ctx.arg.branchToBranch)
llvm::stable_sort(sec->relocs(),
[](const Relocation &lhs, const Relocation &rhs) {
return lhs.offset < rhs.offset;
@@ -1958,6 +1959,9 @@ void elf::postScanRelocations(Ctx &ctx) {
for (ELFFileBase *file : ctx.objectFiles)
for (Symbol *sym : file->getLocalSymbols())
fn(*sym);
+
+ if (ctx.arg.branchToBranch)
+ ctx.target->applyBranchToBranchOpt();
}
static bool mergeCmp(const InputSection *a, const InputSection *b) {
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index fd1e5d33c438a..6dd20b2f0cbaa 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -101,6 +101,7 @@ class TargetInfo {
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
JumpModType val) const {}
+ virtual void applyBranchToBranchOpt() const {}
virtual ~TargetInfo();
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 064ed0828c31f..dabfc961dd5ba 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -62,6 +62,10 @@ ELF Improvements
on executable sections.
(`#128883 <https://github.com/llvm/llvm-project/pull/128883>`_)
+* For AArch64 and X86_64, added ``--branch-to-branch``, which rewrites branches
+ that point to another branch instruction to instead branch directly to the
+ target of the second instruction. Enabled by default at ``-O2``.
+
Breaking changes
----------------
* Executable-only and readable-executable sections are now allowed to be placed
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index cfacdb081a807..7edc522b4f6a4 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -93,6 +93,11 @@ Bind default visibility defined STB_GLOBAL function symbols locally for
.Fl shared.
.It Fl -be8
Write a Big Endian ELF File using BE8 format(AArch32 only)
+.It Fl -branch-to-branch
+Enable the branch-to-branch optimizations: a branch whose target is
+another branch instruction is rewritten to point to the latter branch
+target (AArch64 and X86_64 only). Enabled by default at
+.Fl O2 Ns .
.It Fl -build-id Ns = Ns Ar value
Generate a build ID note.
.Ar value
@@ -414,7 +419,7 @@ If not specified,
.Dv a.out
is used as a default.
.It Fl O Ns Ar value
-Optimize output file size.
+Optimize output file.
.Ar value
may be:
.Pp
@@ -424,7 +429,7 @@ Disable string merging.
.It Cm 1
Enable string merging.
.It Cm 2
-Enable string tail merging.
+Enable string tail merging and branch-to-branch optimization.
.El
.Pp
.Fl O Ns Cm 1
diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s
new file mode 100644
index 0000000000000..c970fe308579d
--- /dev/null
+++ b/lld/test/ELF/aarch64-branch-to-branch.s
@@ -0,0 +1,84 @@
+# REQUIRES: aarch64
+
+## Test that the branch-to-branch optimization follows the links
+## from f1 -> f2 -> f3 and updates all references to point to f3.
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t --branch-to-branch --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,B2B-RELOC %s
+# RUN: ld.lld %t.o -o %t -O2 --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,B2B-RELOC %s
+
+## Test that branch-to-branch is disabled by default.
+
+# RUN: ld.lld %t.o -o %t --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+
+## Test that branch-to-branch is disabled for preemptible symbols.
+
+# RUN: ld.lld %t.o -o %t --branch-to-branch -shared --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+
+.section .rodata.vtable,"a"
+.globl vtable
+vtable:
+# B2B: Contents of section .rodata:
+# RELOC: RELOCATION RECORDS FOR [.rodata]:
+# RELOC-NEXT: OFFSET
+# B2B-NEXT: [[VF:[0-9a-f]{8}]]
+# B2B-RELOC-NEXT: R_AARCH64_PLT32 f3
+# NOB2B-RELOC-NEXT: R_AARCH64_PLT32 f1
+.4byte f1 at PLT - vtable
+# B2B-SAME: [[VF]]
+# B2B-RELOC-NEXT: R_AARCH64_PLT32 f3+0x4
+# NOB2B-RELOC-NEXT: R_AARCH64_PLT32 f2+0x4
+.4byte f2 at PLT - vtable
+# B2B-SAME: [[VF]]
+# RELOC-NEXT: R_AARCH64_PLT32 f3+0x8
+.4byte f3 at PLT - vtable
+
+.section .text._start,"ax"
+.globl _start
+# CHECK: <_start>:
+# RELOC: RELOCATION RECORDS FOR [.text]:
+# RELOC-NEXT: OFFSET
+_start:
+.cfi_startproc
+# B2B: bl {{.*}} <f3>
+# B2B-RELOC-NEXT: R_AARCH64_CALL26 f3
+# NOB2B: bl {{.*}} <f1{{.*}}>
+# NOB2B-RELOC-NEXT: R_AARCH64_CALL26 f1
+bl f1
+# B2B: b {{.*}} <f3>
+# B2B-RELOC-NEXT: R_AARCH64_JUMP26 f3
+# NOB2B: b {{.*}} <f2{{.*}}>
+# NOB2B-RELOC-NEXT: R_AARCH64_JUMP26 f2
+b f2
+.cfi_endproc
+
+.section .text.f1,"ax"
+.globl f1
+f1:
+# B2B-RELOC-NEXT: R_AARCH64_JUMP26 f3
+# NOB2B-RELOC-NEXT: R_AARCH64_JUMP26 f2
+b f2
+
+.section .text.f2,"ax"
+.globl f2
+# CHECK: <f2>:
+f2:
+# CHECK-NEXT: b {{.*}} <f3{{.*}}>
+# RELOC-NEXT: R_AARCH64_JUMP26 f3
+b f3
+
+.section .text.f3,"ax"
+.globl f3
+f3:
+ret
diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s
new file mode 100644
index 0000000000000..28ee5cad0b461
--- /dev/null
+++ b/lld/test/ELF/x86-64-branch-to-branch.s
@@ -0,0 +1,135 @@
+# REQUIRES: x86
+
+## Test that the branch-to-branch optimization follows the links
+## from f1 -> f2 -> f3 and updates all references to point to f3.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t --branch-to-branch --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,B2B-RELOC %s
+# RUN: ld.lld %t.o -o %t -O2 --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,B2B-RELOC %s
+
+## Test that branch-to-branch is disabled by default.
+
+# RUN: ld.lld %t.o -o %t --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+
+## Test that branch-to-branch is disabled for preemptible symbols.
+
+# RUN: ld.lld %t.o -o %t --branch-to-branch -shared --emit-relocs
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
+# RUN: llvm-objdump -r %t | FileCheck --check-prefixes=RELOC,NOB2B-RELOC %s
+
+.section .rodata.vtable,"a"
+.globl vtable
+vtable:
+# B2B: Contents of section .rodata:
+# RELOC: RELOCATION RECORDS FOR [.rodata]:
+# RELOC-NEXT: OFFSET
+# B2B-NEXT: [[VF:[0-9a-f]{8}]]
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3
+# NOB2B-RELOC-NEXT: R_X86_64_PLT32 f1
+.4byte f1 at PLT - vtable
+# B2B-SAME: [[VF]]
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3+0x4
+# NOB2B-RELOC-NEXT: R_X86_64_PLT32 f2+0x4
+.4byte f2 at PLT - vtable
+# B2B-SAME: [[VF]]
+# RELOC-NEXT: R_X86_64_PLT32 f3+0x8
+.4byte f3 at PLT - vtable
+
+# For .rodata.f6
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+
+.section .text._start,"ax"
+.globl _start
+# CHECK: <_start>:
+# RELOC: RELOCATION RECORDS FOR [.text]:
+# RELOC-NEXT: OFFSET
+_start:
+.cfi_startproc
+# B2B-NEXT: jmp {{.*}} <f3>
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+# NOB2B-NEXT: jmp {{.*}} <f1{{.*}}>
+# NOB2B-RELOC-NEXT: R_X86_64_PLT32 f1-0x4
+jmp f1
+# B2B-NEXT: jmp {{.*}} <f3>
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+# NOB2B-NEXT: jmp {{.*}} <f2{{.*}}>
+# NOB2B-RELOC-NEXT: R_X86_64_PLT32 f2-0x4
+jmp f2
+# This will assemble to a relocation pointing to an STT_SECTION for .text.f4
+# with an addend, which looks similar to the relative vtable cases above but
+# requires different handling of the addend so that we don't think this is
+# branching to the `jmp f3` at the start of the target section.
+# CHECK-NEXT: jmp {{.*}} <f4{{.*}}>
+# RELOC-NEXT: R_X86_64_PLT32 .text+0x2e
+jmp f4
+# B2B-NEXT: jmp 0x[[IPLT:[0-9a-f]*]]
+# RELOC-NEXT: R_X86_64_PLT32 f5-0x4
+jmp f5
+# B2B-NEXT: jmp {{.*}} <f6>
+# RELOC-NEXT: R_X86_64_PLT32 f6-0x4
+jmp f6
+# B2B-NEXT: jmp {{.*}} <f7>
+# RELOC-NEXT: R_X86_64_PLT32 f7-0x4
+jmp f7
+.cfi_endproc
+
+.section .text.f1,"ax"
+.globl f1
+f1:
+# B2B-RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+# NOB2B-RELOC-NEXT: R_X86_64_PLT32 f2-0x4
+jmp f2
+
+.section .text.f2,"ax"
+.globl f2
+# CHECK: <f2>:
+f2:
+# CHECK-NEXT: jmp {{.*}} <f3{{.*}}>
+# RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+jmp f3
+
+.section .text.f3,"ax"
+.globl f3
+f3:
+# Test that a self-branch doesn't trigger an infinite loop.
+# RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+jmp f3
+
+.section .text.f4,"ax"
+jmp f3
+f4:
+ret
+
+.section .text.f5,"ax"
+.type f5, @gnu_indirect_function
+.globl f5
+f5:
+# RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+jmp f3
+
+.section .rodata.f6,"a"
+.globl f6
+f6:
+# RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+jmp f3
+
+# RELOC: RELOCATION RECORDS FOR [.wtext.f7]:
+# RELOC-NEXT: OFFSET
+
+.section .wtext.f7,"awx"
+.globl f7
+f7:
+# RELOC-NEXT: R_X86_64_PLT32 f3-0x4
+jmp f3
+
+# B2B: <.iplt>:
+# B2B-NEXT: [[IPLT]]:
More information about the llvm-commits
mailing list