[lld] ELF: Add branch-to-branch optimization. (PR #138366)

Peter Collingbourne via llvm-commits llvm-commits at lists.llvm.org
Thu May 8 21:59:20 PDT 2025


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/138366

>From 03060849dc81f83ec48f05995ac8fd6df846c25b Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Fri, 2 May 2025 16:57:28 -0700
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 lld/ELF/Arch/AArch64.cpp                | 58 +++++++++++++++++
 lld/ELF/Arch/TargetImpl.h               | 87 +++++++++++++++++++++++++
 lld/ELF/Arch/X86_64.cpp                 | 54 +++++++++++++++
 lld/ELF/Config.h                        |  1 +
 lld/ELF/Driver.cpp                      |  2 +
 lld/ELF/Options.td                      |  4 ++
 lld/ELF/Relocations.cpp                 |  8 ++-
 lld/ELF/Target.h                        |  1 +
 lld/docs/ld.lld.1                       |  8 ++-
 lld/test/ELF/aarch64-branch-to-branch.s | 58 +++++++++++++++++
 lld/test/ELF/x86-64-branch-to-branch.s  | 58 +++++++++++++++++
 11 files changed, 335 insertions(+), 4 deletions(-)
 create mode 100644 lld/ELF/Arch/TargetImpl.h
 create mode 100644 lld/test/ELF/aarch64-branch-to-branch.s
 create mode 100644 lld/test/ELF/x86-64-branch-to-branch.s

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9538dd4a70bae..f3a24bd8a9184 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -11,6 +11,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
+#include "TargetImpl.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/Endian.h"
@@ -83,6 +84,7 @@ class AArch64 : public TargetInfo {
                 uint64_t val) const override;
   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
+  void applyBranchToBranchOpt() const override;
 
 private:
   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -975,6 +977,62 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
   }
 }
 
+static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
+                                                        Relocation &r) {
+  // Identify a control transfer relocation for the branch-to-branch
+  // optimization. A "control transfer relocation" means a B or BL
+  // target but it also includes relative vtable relocations for example.
+  //
+  // We require the relocation type to be JUMP26, CALL26 or PLT32. With a
+  // relocation type of PLT32 the value may be assumed to be used for branching
+  // directly to the symbol and the addend is only used to produce the relocated
+  // value (hence the effective addend is always 0). This is because if a PLT is
+  // needed the addend will be added to the address of the PLT, and it doesn't
+  // make sense to branch into the middle of a PLT. For example, relative vtable
+  // relocations use PLT32 and 0 or a positive value as the addend but still are
+  // used to branch to the symbol.
+  //
+  // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
+  // addend is that we are branching to symbol+addend so that becomes the
+  // effective addend.
+  if (r.type == R_AARCH64_PLT32)
+    return 0;
+  if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
+    return r.addend;
+  return std::nullopt;
+}
+
+static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is,
+                                                       uint64_t offset) {
+  auto *i = std::lower_bound(
+      is.relocations.begin(), is.relocations.end(), offset,
+      [](Relocation &r, uint64_t offset) { return r.offset < offset; });
+  if (i != is.relocations.end() && i->offset == offset &&
+      i->type == R_AARCH64_JUMP26) {
+    return {i, i->addend};
+  }
+  return {nullptr, 0};
+}
+
+static void mergeControlTransferRelocations(Relocation &r1,
+                                            const Relocation &r2) {
+  r1.expr = r2.expr;
+  r1.sym = r2.sym;
+  // With PLT32 we must respect the original addend as that affects the value's
+  // interpretation. With the other relocation types the original addend is
+  // irrelevant because it referred to an offset within the original target
+  // section so we overwrite it.
+  if (r1.type == R_AARCH64_PLT32)
+    r1.addend += r2.addend;
+  else
+    r1.addend = r2.addend;
+}
+
+void AArch64::applyBranchToBranchOpt() const {
+  applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend,
+                             mergeControlTransferRelocations);
+}
+
 // AArch64 may use security features in variant PLT sequences. These are:
 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used
diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h
new file mode 100644
index 0000000000000..bb10749516953
--- /dev/null
+++ b/lld/ELF/Arch/TargetImpl.h
@@ -0,0 +1,87 @@
+//===- TargetImpl.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_ARCH_TARGETIMPL_H
+#define LLD_ELF_ARCH_TARGETIMPL_H
+
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+namespace lld {
+namespace elf {
+
+// getControlTransferAddend: If this relocation is used for control transfer
+// instructions (e.g. branch, branch-link or call) or code references (e.g.
+// virtual function pointers) and indicates an address-insignificant reference,
+// return the effective addend for the relocation, otherwise return
+// std::nullopt. The effective addend for a relocation is the addend that is
+// used to determine its branch destination.
+//
+// getBranchInfo: If a control transfer relocation referring to is+offset
+// directly transfers control to a relocated branch instruction in the specified
+// section, return the relocation for the branch target as well as its effective
+// addend (see above). Otherwise return {nullptr, 0}.
+//
+// mergeControlTransferRelocations: Given r1, a relocation for which
+// getControlTransferAddend() returned a value, and r2, a relocation returned by
+// getBranchInfo(), modify r1 so that it branches directly to the target of r2.
+template <typename GetBranchInfo, typename GetControlTransferAddend,
+          typename MergeControlTransferRelocations>
+inline void applyBranchToBranchOptImpl(
+    Ctx &ctx, GetBranchInfo getBranchInfo,
+    GetControlTransferAddend getControlTransferAddend,
+    MergeControlTransferRelocations mergeControlTransferRelocations) {
+  // Needs to run serially because it writes to the relocations array as well as
+  // reading relocations of other sections.
+  for (ELFFileBase *f : ctx.objectFiles) {
+    auto getRelocBranchInfo =
+        [&ctx, &getBranchInfo](Relocation &r,
+               uint64_t addend) -> std::pair<Relocation *, uint64_t> {
+      auto *target = dyn_cast_or_null<Defined>(r.sym);
+      // We don't allow preemptible symbols (may go somewhere else),
+      // absolute symbols (runtime behavior unknown), non-executable memory
+      // (ditto) or non-regular sections (no section data).
+      if (!target || target->isPreemptible || !target->section ||
+          !(target->section->flags & llvm::ELF::SHF_EXECINSTR) ||
+          target->section->kind() != SectionBase::Regular)
+        return {nullptr, 0};
+      return getBranchInfo(*cast<InputSection>(target->section),
+                                       target->value + addend);
+    };
+    for (InputSectionBase *s : f->getSections()) {
+      if (!s)
+        continue;
+      for (Relocation &r : s->relocations) {
+        if (std::optional<uint64_t> addend =
+                getControlTransferAddend(*cast<InputSection>(s),
+                                                     r)) {
+          std::pair<Relocation *, uint64_t> targetAndAddend =
+              getRelocBranchInfo(r, *addend);
+          if (targetAndAddend.first) {
+            while (1) {
+              std::pair<Relocation *, uint64_t> nextTargetAndAddend =
+                  getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second);
+              if (!nextTargetAndAddend.first)
+                break;
+              targetAndAddend = nextTargetAndAddend;
+            }
+            mergeControlTransferRelocations(r, *targetAndAddend.first);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace elf
+} // namespace lld
+
+#endif
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 0c4fd00cab65c..0a4578b0aca4b 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -11,6 +11,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
+#include "TargetImpl.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/Endian.h"
@@ -50,6 +51,7 @@ class X86_64 : public TargetInfo {
   bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
                              InputSection *nextIS) const override;
   bool relaxOnce(int pass) const override;
+  void applyBranchToBranchOpt() const override;
 
 private:
   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -1162,6 +1164,58 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
   }
 }
 
+static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
+                                                        Relocation &r) {
+  // Identify a control transfer relocation for the branch-to-branch
+  // optimization. A "control transfer relocation" usually means a CALL or JMP
+  // target but it also includes relative vtable relocations for example.
+  //
+  // We require the relocation type to be PLT32. With a relocation type of PLT32
+  // the value may be assumed to be used for branching directly to the symbol
+  // and the addend is only used to produce the relocated value (hence the
+  // effective addend is always 0). This is because if a PLT is needed the
+  // addend will be added to the address of the PLT, and it doesn't make sense
+  // to branch into the middle of a PLT. For example, relative vtable
+  // relocations use PLT32 and 0 or a positive value as the addend but still are
+  // used to branch to the symbol.
+  if (r.type == R_X86_64_PLT32)
+    return 0;
+  return std::nullopt;
+}
+
+static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is,
+                                                       uint64_t offset) {
+  auto content = is.contentMaybeDecompress();
+  if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate
+    auto *i = std::lower_bound(
+        is.relocations.begin(), is.relocations.end(), offset + 1,
+        [](Relocation &r, uint64_t offset) { return r.offset < offset; });
+    // Unlike with getControlTransferAddend() it is valid to accept a PC32
+    // relocation here because we know that this is actually a JMP and not some
+    // other reference, so the interpretation is that we add 4 to the addend and
+    // use that as the effective addend.
+    if (i != is.relocations.end() && i->offset == offset + 1 &&
+        (i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) {
+      return {i, i->addend + 4};
+    }
+  }
+  return {nullptr, 0};
+}
+
+static void mergeControlTransferRelocations(Relocation &r1,
+                                            const Relocation &r2) {
+  r1.expr = r2.expr;
+  r1.sym = r2.sym;
+  // The +4 is here to compensate for r2.addend which will likely be -4,
+  // but may also be addend-4 in case of a PC32 branch to symbol+addend.
+  r1.addend += r2.addend + 4;
+}
+
+void X86_64::applyBranchToBranchOpt() const {
+  applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend,
+                             mergeControlTransferRelocations);
+}
+
 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
 // entries containing endbr64 instructions. A PLT entry will be split into two
 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index f0e9592d85dd6..b7449b9d13cf5 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -276,6 +276,7 @@ struct Config {
   bool bpFunctionOrderForCompression = false;
   bool bpDataOrderForCompression = false;
   bool bpVerboseSectionOrderer = false;
+  bool branchToBranch = false;
   bool checkSections;
   bool checkDynamicRelocs;
   std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 9d36071e1532f..e79372957e408 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1589,6 +1589,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
   ctx.arg.zWxneeded = hasZOption(args, "wxneeded");
   setUnresolvedSymbolPolicy(ctx, args);
   ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
+  ctx.arg.branchToBranch = args.hasFlag(
+      OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2);
 
   if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
     if (arg->getOption().matches(OPT_eb))
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 76d28096f82c8..40fc0d2c8c64e 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -59,6 +59,10 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">,
   MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">;
 def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">;
 
+defm branch_to_branch: B<"branch-to-branch",
+    "Enable branch-to-branch optimization (default at -O2)",
+    "Disable branch-to-branch optimization (default at -O0 and -O1)">;
+
 defm check_sections: B<"check-sections",
     "Check section addresses for overlaps (default)",
     "Do not check section addresses for overlaps">;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 277acb26987bc..457fd19da5493 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1671,9 +1671,10 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
   }
 
   // Sort relocations by offset for more efficient searching for
-  // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
+  // R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization.
   if (ctx.arg.emachine == EM_RISCV ||
-      (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc"))
+      (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
+      ctx.arg.branchToBranch)
     llvm::stable_sort(sec->relocs(),
                       [](const Relocation &lhs, const Relocation &rhs) {
                         return lhs.offset < rhs.offset;
@@ -1964,6 +1965,9 @@ void elf::postScanRelocations(Ctx &ctx) {
   for (ELFFileBase *file : ctx.objectFiles)
     for (Symbol *sym : file->getLocalSymbols())
       fn(*sym);
+
+  if (ctx.arg.branchToBranch)
+    ctx.target->applyBranchToBranchOpt();
 }
 
 static bool mergeCmp(const InputSection *a, const InputSection *b) {
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index fd1e5d33c438a..6dd20b2f0cbaa 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -101,6 +101,7 @@ class TargetInfo {
 
   virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
                                  JumpModType val) const {}
+  virtual void applyBranchToBranchOpt() const {}
 
   virtual ~TargetInfo();
 
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 7b2650637cb10..d7b987ded784d 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -93,6 +93,10 @@ Bind default visibility defined STB_GLOBAL function symbols locally for
 .Fl shared.
 .It Fl --be8
 Write a Big Endian ELF File using BE8 format(AArch32 only)
+.It Fl -branch-to-branch
+Enable the branch-to-branch optimizations: a branch whose target is
+another branch instruction is rewritten to point to the latter branch
+target (AArch64 and X86_64 only). Enabled by default at -O2.
 .It Fl -build-id Ns = Ns Ar value
 Generate a build ID note.
 .Ar value
@@ -414,7 +418,7 @@ If not specified,
 .Dv a.out
 is used as a default.
 .It Fl O Ns Ar value
-Optimize output file size.
+Optimize output file.
 .Ar value
 may be:
 .Pp
@@ -424,7 +428,7 @@ Disable string merging.
 .It Cm 1
 Enable string merging.
 .It Cm 2
-Enable string tail merging.
+Enable string tail merging and branch-to-branch optimization.
 .El
 .Pp
 .Fl O Ns Cm 1
diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s
new file mode 100644
index 0000000000000..3a3ae04ac0538
--- /dev/null
+++ b/lld/test/ELF/aarch64-branch-to-branch.s
@@ -0,0 +1,58 @@
+# REQUIRES: aarch64
+
+## Test that the branch-to-branch optimization follows the links
+## from f1 -> f2 -> f3 and updates all references to point to f3.
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t --branch-to-branch
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: ld.lld %t.o -o %t -O2
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+
+## Test that branch-to-branch is disabled by default.
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+
+## Test that branch-to-branch is disabled for preemptible symbols.
+
+# RUN: ld.lld %t.o -o %t --branch-to-branch -shared
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+
+.section .rodata.vtable,"a"
+.globl vtable
+vtable:
+# B2B: Contents of section .rodata:
+# B2B-NEXT: [[VF:[0-9a-f]{8}]]
+.4byte f1 at PLT - vtable
+# B2B-SAME: [[VF]]
+.4byte f2 at PLT - vtable
+# B2B-SAME: [[VF]]
+.4byte f3 at PLT - vtable
+
+.section .text._start,"ax"
+.globl _start
+_start:
+# B2B: bl {{.*}} <f3>
+# NOB2B: bl {{.*}} <f1{{.*}}>
+bl f1
+# B2B: b {{.*}} <f3>
+# NOB2B: b {{.*}} <f2{{.*}}>
+b f2
+
+.section .text.f1,"ax"
+.globl f1
+f1:
+b f2
+
+.section .text.f2,"ax"
+.globl f2
+f2:
+b f3
+
+.section .text.f3,"ax"
+.globl f3
+f3:
+ret
diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s
new file mode 100644
index 0000000000000..b9c9abe2eb752
--- /dev/null
+++ b/lld/test/ELF/x86-64-branch-to-branch.s
@@ -0,0 +1,58 @@
+# REQUIRES: x86
+
+## Test that the branch-to-branch optimization follows the links
+## from f1 -> f2 -> f3 and updates all references to point to f3.
+ 
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t --branch-to-branch
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: ld.lld %t.o -o %t -O2
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+
+## Test that branch-to-branch is disabled by default.
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+
+## Test that branch-to-branch is disabled for preemptible symbols.
+
+# RUN: ld.lld %t.o -o %t --branch-to-branch -shared
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+
+.section .rodata.vtable,"a"
+.globl vtable
+vtable:
+# B2B: Contents of section .rodata:
+# B2B-NEXT: [[VF:[0-9a-f]{8}]]
+.4byte f1 at PLT - vtable
+# B2B-SAME: [[VF]]
+.4byte f2 at PLT - vtable
+# B2B-SAME: [[VF]]
+.4byte f3 at PLT - vtable
+
+.section .text._start,"ax"
+.globl _start
+_start:
+# B2B: jmp {{.*}} <f3>
+# NOB2B: jmp {{.*}} <f1{{.*}}>
+jmp f1
+# B2B: jmp {{.*}} <f3>
+# NOB2B: jmp {{.*}} <f2{{.*}}>
+jmp f2
+
+.section .text.f1,"ax"
+.globl f1
+f1:
+jmp f2
+
+.section .text.f2,"ax"
+.globl f2
+f2:
+jmp f3
+
+.section .text.f3,"ax"
+.globl f3
+f3:
+ret

>From bcebed6353f63c980cd4d05ba9726f88193deca4 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Fri, 2 May 2025 17:06:37 -0700
Subject: [PATCH 2/5] Formatting, fix warning

Created using spr 1.3.6-beta.1
---
 lld/ELF/Arch/TargetImpl.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h
index bb10749516953..757c0e2c0c51b 100644
--- a/lld/ELF/Arch/TargetImpl.h
+++ b/lld/ELF/Arch/TargetImpl.h
@@ -43,8 +43,8 @@ inline void applyBranchToBranchOptImpl(
   // reading relocations of other sections.
   for (ELFFileBase *f : ctx.objectFiles) {
     auto getRelocBranchInfo =
-        [&ctx, &getBranchInfo](Relocation &r,
-               uint64_t addend) -> std::pair<Relocation *, uint64_t> {
+        [&getBranchInfo](Relocation &r,
+                         uint64_t addend) -> std::pair<Relocation *, uint64_t> {
       auto *target = dyn_cast_or_null<Defined>(r.sym);
       // We don't allow preemptible symbols (may go somewhere else),
       // absolute symbols (runtime behavior unknown), non-executable memory
@@ -54,21 +54,21 @@ inline void applyBranchToBranchOptImpl(
           target->section->kind() != SectionBase::Regular)
         return {nullptr, 0};
       return getBranchInfo(*cast<InputSection>(target->section),
-                                       target->value + addend);
+                           target->value + addend);
     };
     for (InputSectionBase *s : f->getSections()) {
       if (!s)
         continue;
       for (Relocation &r : s->relocations) {
         if (std::optional<uint64_t> addend =
-                getControlTransferAddend(*cast<InputSection>(s),
-                                                     r)) {
+                getControlTransferAddend(*cast<InputSection>(s), r)) {
           std::pair<Relocation *, uint64_t> targetAndAddend =
               getRelocBranchInfo(r, *addend);
           if (targetAndAddend.first) {
             while (1) {
               std::pair<Relocation *, uint64_t> nextTargetAndAddend =
-                  getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second);
+                  getRelocBranchInfo(*targetAndAddend.first,
+                                     targetAndAddend.second);
               if (!nextTargetAndAddend.first)
                 break;
               targetAndAddend = nextTargetAndAddend;

>From 815550728987ac7338db486a11008bb8ed7a9eba Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Mon, 5 May 2025 15:02:20 -0700
Subject: [PATCH 3/5] Address comments

Created using spr 1.3.6-beta.1
---
 lld/ELF/Options.td                      |  2 +-
 lld/test/ELF/aarch64-branch-to-branch.s | 13 ++++++++-----
 lld/test/ELF/x86-64-branch-to-branch.s  | 21 ++++++++++++---------
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 40fc0d2c8c64e..2ce9e07dc6f2b 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -59,7 +59,7 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">,
   MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">;
 def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">;
 
-defm branch_to_branch: B<"branch-to-branch",
+defm branch_to_branch: BB<"branch-to-branch",
     "Enable branch-to-branch optimization (default at -O2)",
     "Disable branch-to-branch optimization (default at -O0 and -O1)">;
 
diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s
index 3a3ae04ac0538..06c899fd4e6b7 100644
--- a/lld/test/ELF/aarch64-branch-to-branch.s
+++ b/lld/test/ELF/aarch64-branch-to-branch.s
@@ -5,21 +5,21 @@
 
 # RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o
 # RUN: ld.lld %t.o -o %t --branch-to-branch
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
 # RUN: ld.lld %t.o -o %t -O2
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
 
 ## Test that branch-to-branch is disabled by default.
 
 # RUN: ld.lld %t.o -o %t
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 # RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 
 ## Test that branch-to-branch is disabled for preemptible symbols.
 
 # RUN: ld.lld %t.o -o %t --branch-to-branch -shared
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 
 .section .rodata.vtable,"a"
 .globl vtable
@@ -34,6 +34,7 @@ vtable:
 
 .section .text._start,"ax"
 .globl _start
+# CHECK: <_start>:
 _start:
 # B2B: bl {{.*}} <f3>
 # NOB2B: bl {{.*}} <f1{{.*}}>
@@ -49,7 +50,9 @@ b f2
 
 .section .text.f2,"ax"
 .globl f2
+# CHECK: <f2>:
 f2:
+# CHECK-NEXT: b {{.*}} <f3{{.*}}>
 b f3
 
 .section .text.f3,"ax"
diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s
index b9c9abe2eb752..0c9e903438f8e 100644
--- a/lld/test/ELF/x86-64-branch-to-branch.s
+++ b/lld/test/ELF/x86-64-branch-to-branch.s
@@ -5,21 +5,21 @@
  
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: ld.lld %t.o -o %t --branch-to-branch
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
 # RUN: ld.lld %t.o -o %t -O2
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s
 
 ## Test that branch-to-branch is disabled by default.
 
 # RUN: ld.lld %t.o -o %t
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 # RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 
 ## Test that branch-to-branch is disabled for preemptible symbols.
 
 # RUN: ld.lld %t.o -o %t --branch-to-branch -shared
-# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
+# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s
 
 .section .rodata.vtable,"a"
 .globl vtable
@@ -34,12 +34,13 @@ vtable:
 
 .section .text._start,"ax"
 .globl _start
+# CHECK: <_start>:
 _start:
-# B2B: jmp {{.*}} <f3>
-# NOB2B: jmp {{.*}} <f1{{.*}}>
+# B2B-NEXT: jmp {{.*}} <f3>
+# NOB2B-NEXT: jmp {{.*}} <f1{{.*}}>
 jmp f1
-# B2B: jmp {{.*}} <f3>
-# NOB2B: jmp {{.*}} <f2{{.*}}>
+# B2B-NEXT: jmp {{.*}} <f3>
+# NOB2B-NEXT: jmp {{.*}} <f2{{.*}}>
 jmp f2
 
 .section .text.f1,"ax"
@@ -49,7 +50,9 @@ jmp f2
 
 .section .text.f2,"ax"
 .globl f2
+# CHECK: <f2>:
 f2:
+# CHECK-NEXT: jmp {{.*}} <f3{{.*}}>
 jmp f3
 
 .section .text.f3,"ax"

>From c2d4697dff86e36b307d8d489c34f4e7f156acab Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Thu, 8 May 2025 21:44:30 -0700
Subject: [PATCH 4/5] Fix bugs found during testing

Created using spr 1.3.6-beta.1
---
 lld/ELF/Arch/TargetImpl.h              | 11 ++++++++---
 lld/ELF/Arch/X86_64.cpp                | 22 ++++++++++++++++++----
 lld/test/ELF/x86-64-branch-to-branch.s | 12 ++++++++++++
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h
index 757c0e2c0c51b..fffb6ca927319 100644
--- a/lld/ELF/Arch/TargetImpl.h
+++ b/lld/ELF/Arch/TargetImpl.h
@@ -46,10 +46,11 @@ inline void applyBranchToBranchOptImpl(
         [&getBranchInfo](Relocation &r,
                          uint64_t addend) -> std::pair<Relocation *, uint64_t> {
       auto *target = dyn_cast_or_null<Defined>(r.sym);
-      // We don't allow preemptible symbols (may go somewhere else),
+      // We don't allow preemptible symbols or ifuncs (may go somewhere else),
       // absolute symbols (runtime behavior unknown), non-executable memory
       // (ditto) or non-regular sections (no section data).
-      if (!target || target->isPreemptible || !target->section ||
+      if (!target || target->isPreemptible || target->isGnuIFunc() ||
+          !target->section ||
           !(target->section->flags & llvm::ELF::SHF_EXECINSTR) ||
           target->section->kind() != SectionBase::Regular)
         return {nullptr, 0};
@@ -65,7 +66,11 @@ inline void applyBranchToBranchOptImpl(
           std::pair<Relocation *, uint64_t> targetAndAddend =
               getRelocBranchInfo(r, *addend);
           if (targetAndAddend.first) {
-            while (1) {
+            // Avoid getting stuck in an infinite loop if we encounter a branch
+            // that (possibly indirectly) branches to itself. It is unlikely
+            // that more than 5 iterations will ever be needed in practice.
+            size_t iterations = 5;
+            while (iterations--) {
               std::pair<Relocation *, uint64_t> nextTargetAndAddend =
                   getRelocBranchInfo(*targetAndAddend.first,
                                      targetAndAddend.second);
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 0a4578b0aca4b..fb1410c1b84e8 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -1178,8 +1178,15 @@ static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
   // to branch into the middle of a PLT. For example, relative vtable
   // relocations use PLT32 and 0 or a positive value as the addend but still are
   // used to branch to the symbol.
-  if (r.type == R_X86_64_PLT32)
+  //
+  // STT_SECTION symbols are a special case on x86 because the LLVM assembler
+  // uses them for branches to local symbols which are assembled as referring to
+  // the section symbol with the addend equal to the symbol value - 4.
+  if (r.type == R_X86_64_PLT32) {
+    if (r.sym->isSection())
+      return r.addend + 4;
     return 0;
+  }
   return std::nullopt;
 }
 
@@ -1204,11 +1211,18 @@ static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is,
 
 static void mergeControlTransferRelocations(Relocation &r1,
                                             const Relocation &r2) {
-  r1.expr = r2.expr;
-  r1.sym = r2.sym;
+  // The isSection() check handles the STT_SECTION case described above.
+  // In that case the original addend is irrelevant because it referred to an
+  // offset within the original target section so we overwrite it.
+  //
   // The +4 is here to compensate for r2.addend which will likely be -4,
   // but may also be addend-4 in case of a PC32 branch to symbol+addend.
-  r1.addend += r2.addend + 4;
+  if (r1.sym->isSection())
+    r1.addend = r2.addend;
+  else
+    r1.addend += r2.addend + 4;
+  r1.expr = r2.expr;
+  r1.sym = r2.sym;
 }
 
 void X86_64::applyBranchToBranchOpt() const {
diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s
index 0c9e903438f8e..52da61ef8f5af 100644
--- a/lld/test/ELF/x86-64-branch-to-branch.s
+++ b/lld/test/ELF/x86-64-branch-to-branch.s
@@ -42,6 +42,12 @@ jmp f1
 # B2B-NEXT: jmp {{.*}} <f3>
 # NOB2B-NEXT: jmp {{.*}} <f2{{.*}}>
 jmp f2
+# This will assemble to a relocation pointing to an STT_SECTION for .text.f4
+# with an addend, which looks similar to the relative vtable cases above but
+# requires different handling of the addend so that we don't think this is
+# branching to the `jmp f3` at the start of the target section.
+# CHECK-NEXT: jmp {{.*}} <f4{{.*}}>
+jmp f4
 
 .section .text.f1,"ax"
 .globl f1
@@ -58,4 +64,10 @@ jmp f3
 .section .text.f3,"ax"
 .globl f3
 f3:
+# Test that a self-branch doesn't trigger an infinite loop.
+jmp f3
+
+.section .text.f4,"ax"
+jmp f3
+f4:
 ret

>From 8dc8490ba0b5422184f0b8163d7c5a5f02606fc1 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Thu, 8 May 2025 21:59:05 -0700
Subject: [PATCH 5/5] Add test for ifunc case

Created using spr 1.3.6-beta.1
---
 lld/test/ELF/x86-64-branch-to-branch.s | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s
index 52da61ef8f5af..c127d49ba9833 100644
--- a/lld/test/ELF/x86-64-branch-to-branch.s
+++ b/lld/test/ELF/x86-64-branch-to-branch.s
@@ -48,6 +48,8 @@ jmp f2
 # branching to the `jmp f3` at the start of the target section.
 # CHECK-NEXT: jmp {{.*}} <f4{{.*}}>
 jmp f4
+# B2B-NEXT: jmp 0x[[IPLT:[0-9a-f]*]]
+jmp f5
 
 .section .text.f1,"ax"
 .globl f1
@@ -71,3 +73,12 @@ jmp f3
 jmp f3
 f4:
 ret
+
+.section .text.f5,"ax"
+.type f5, @gnu_indirect_function
+.globl f5
+f5:
+jmp f3
+
+# B2B: <.iplt>:
+# B2B-NEXT: [[IPLT]]:



More information about the llvm-commits mailing list