[lld] [lld][Hexagon] Redirect undefined weak branches to guard section (PR #186613)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 14 11:40:51 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-elf

Author: Brian Cain (androm3da)

<details>
<summary>Changes</summary>

Hexagon undefined weak branch relocations previously resolved to the start of the current packet (zero offset from PC).  This was incorrect for multi-word packets because it produced mid-packet branch targets.

Replace that approach with a linker-synthesized guard function `__linker_guard_weak_undef` containing `{ jumpr r31 }`.  During relocation scanning, all branch relocations targeting undefined weak symbols are redirected to this guard.  Calls become no-ops (the guard returns immediately) and jumps perform an early return.  This is safe for all packet shapes and matches the eld linker behavior.

The guard section is only included in the output when at least one undefined weak branch is encountered (gated by `isNeeded()`), so it has zero impact on the layout of binaries that don't use undefined weak branches.

---
Full diff: https://github.com/llvm/llvm-project/pull/186613.diff


4 Files Affected:

- (modified) lld/ELF/Arch/Hexagon.cpp (+66-6) 
- (modified) lld/ELF/Config.h (+2) 
- (modified) lld/ELF/Writer.cpp (+6) 
- (added) lld/test/ELF/hexagon-undefined-weak-thunk.s (+59) 


``````````diff
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 4204e2f4e524a..1dd521838dd2e 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -38,6 +38,7 @@ class Hexagon final : public TargetInfo {
                      const uint8_t *loc) const override;
   RelType getDynRel(RelType type) const override;
   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
+  void initTargetSpecificSections() override;
   template <class ELFT, class RelTy>
   void scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels);
   void scanSection(InputSectionBase &sec) override {
@@ -78,6 +79,41 @@ Hexagon::Hexagon(Ctx &ctx) : TargetInfo(ctx) {
   needsThunks = true;
 }
 
+namespace {
+class HexagonGuardSection final : public SyntheticSection {
+public:
+  std::atomic<bool> isUsed{false};
+  HexagonGuardSection(Ctx &ctx)
+      : SyntheticSection(ctx, ".text.guard", SHT_PROGBITS,
+                         SHF_ALLOC | SHF_EXECINSTR, 4) {}
+  size_t getSize() const override { return 4; }
+  bool isNeeded() const override {
+    return isUsed.load(std::memory_order_relaxed);
+  }
+  void finalizeContents() override {
+    if (isNeeded() && ctx.in.symTab)
+      ctx.in.symTab->addSymbol(ctx.in.hexagonGuardSym);
+  }
+  void writeTo(uint8_t *buf) override {
+    // { jumpr r31 }
+    write32le(buf, 0x529fc000);
+  }
+};
+} // namespace
+
+void Hexagon::initTargetSpecificSections() {
+  ctx.in.hexagonGuard = std::make_unique<HexagonGuardSection>(ctx);
+  ctx.inputSections.push_back(ctx.in.hexagonGuard.get());
+  // Create the guard symbol but do not add it to the symtab yet.
+  // HexagonGuardSection::finalizeContents() adds it only if the section is
+  // actually referenced, avoiding a dangling symtab entry when the section
+  // is removed by removeUnusedSyntheticSections().
+  ctx.in.hexagonGuardSym =
+      makeDefined(ctx, /*file=*/nullptr, "__linker_guard_weak_undef", STB_LOCAL,
+                  STV_DEFAULT, STT_FUNC, /*value=*/0, /*size=*/4,
+                  ctx.in.hexagonGuard.get());
+}
+
 uint32_t Hexagon::calcEFlags() const {
   // The architecture revision must always be equal to or greater than
   // greatest revision in the list of inputs.
@@ -161,24 +197,44 @@ void Hexagon::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
       expr = R_ABS;
       break;
 
-    // PC-relative relocations:
+    // PC-relative branch relocations — redirect undefined weak symbols
+    // to the guard section so branches land on a safe { jumpr r31 } stub
+    // instead of a potentially mid-packet target.
     case R_HEX_B9_PCREL:
     case R_HEX_B13_PCREL:
-    case R_HEX_B15_PCREL:
+    case R_HEX_B15_PCREL: {
+      Symbol *target = &sym;
+      if (sym.isUndefWeak() && ctx.in.hexagonGuardSym) {
+        target = ctx.in.hexagonGuardSym;
+        static_cast<HexagonGuardSection *>(ctx.in.hexagonGuard.get())
+            ->isUsed.store(true, std::memory_order_relaxed);
+      }
+      rs.processR_PC(type, offset, addend, *target);
+      continue;
+    }
+
+    // Non-branch PC-relative relocations (no redirect needed):
     case R_HEX_6_PCREL_X:
     case R_HEX_32_PCREL:
       rs.processR_PC(type, offset, addend, sym);
       continue;
 
-    // PLT-generating relocations:
+    // PLT-generating branch relocations — same guard redirect:
     case R_HEX_B9_PCREL_X:
     case R_HEX_B15_PCREL_X:
     case R_HEX_B22_PCREL:
     case R_HEX_PLT_B22_PCREL:
     case R_HEX_B22_PCREL_X:
-    case R_HEX_B32_PCREL_X:
-      rs.processR_PLT_PC(type, offset, addend, sym);
+    case R_HEX_B32_PCREL_X: {
+      Symbol *target = &sym;
+      if (sym.isUndefWeak() && ctx.in.hexagonGuardSym) {
+        target = ctx.in.hexagonGuardSym;
+        static_cast<HexagonGuardSection *>(ctx.in.hexagonGuard.get())
+            ->isUsed.store(true, std::memory_order_relaxed);
+      }
+      rs.processR_PLT_PC(type, offset, addend, *target);
       continue;
+    }
     case R_HEX_GD_PLT_B22_PCREL:
     case R_HEX_GD_PLT_B22_PCREL_X:
     case R_HEX_GD_PLT_B32_PCREL_X:
@@ -370,7 +426,11 @@ bool Hexagon::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
 bool Hexagon::needsThunk(RelExpr expr, RelType type, const InputFile *file,
                          uint64_t branchAddr, const Symbol &s,
                          int64_t a) const {
-  // Only check branch range for supported branch relocation types
+  // Undefined weak symbols without PLT entries are redirected to the
+  // guard section during relocation scanning, so this check should not
+  // normally be reached for branch relocs.  It remains as a safety net.
+  if (s.isUndefined() && !s.isInPlt(ctx))
+    return false;
   switch (type) {
   case R_HEX_B22_PCREL:
   case R_HEX_PLT_B22_PCREL:
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a9f74460f6f99..5ba99d4d66789 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -563,6 +563,8 @@ struct UndefinedDiag {
 struct InStruct {
   std::unique_ptr<InputSection> attributes;
   std::unique_ptr<SyntheticSection> hexagonAttributes;
+  std::unique_ptr<SyntheticSection> hexagonGuard;
+  Defined *hexagonGuardSym = nullptr;
   std::unique_ptr<SyntheticSection> riscvAttributes;
   std::unique_ptr<BssSection> bss;
   std::unique_ptr<BssSection> bssRelRo;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index c84e4df767452..6353d823d8864 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2011,6 +2011,12 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     ctx.in.mipsGot->build();
 
   removeUnusedSyntheticSections(ctx);
+
+  // If the Hexagon guard section survived removal (i.e. it is needed),
+  // add its local symbol to the symbol table now, before any finalization
+  // that bakes section sizes into the layout.
+  finalizeSynthetic(ctx, ctx.in.hexagonGuard.get());
+
   ctx.script->diagnoseOrphanHandling();
   ctx.script->diagnoseMissingSGSectionAddress();
 
diff --git a/lld/test/ELF/hexagon-undefined-weak-thunk.s b/lld/test/ELF/hexagon-undefined-weak-thunk.s
new file mode 100644
index 0000000000000..d289335c33bfd
--- /dev/null
+++ b/lld/test/ELF/hexagon-undefined-weak-thunk.s
@@ -0,0 +1,59 @@
+# REQUIRES: hexagon
+# RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+## Undefined weak branch targets are redirected to a linker-synthesized
+## guard function containing { jumpr r31 }.  This is safe for all packet
+## shapes because the guard is always a valid branch target (packet-
+## aligned, single-word packet).  Calls become no-ops (guard returns
+## immediately), jumps do an early return.
+
+.weak undefined_weak
+.globl _start
+.type _start, @function
+_start:
+  ## Simple call — single-word packet.
+  call undefined_weak
+
+  ## Call in a two-word packet with an ALU op.
+  { r0 = #0
+    call undefined_weak }
+
+  ## Conditional call — single-word packet.
+  { if (p0) call #undefined_weak }
+
+  ## Jump in a two-word packet.
+  { r0 = #0; jump #undefined_weak }
+
+  ## Two conditional calls plus an ALU op — three-word packet.
+  { r2 = add(r0, r1)
+    if (p0) call #undefined_weak
+    if (!p0) call #undefined_weak }
+
+  ## Conditional jump with ALU op — two-word packet.
+  { r2 = add(r0, r1)
+    if (r0 == #0) jump:t #undefined_weak }
+
+  jumpr r31
+
+## All branches in _start target the guard function.
+# CHECK:      <_start>:
+# CHECK-NEXT:   {{[0-9a-f]+}}: { call 0x[[#%x,GUARD:]] <__linker_guard_weak_undef> }
+# CHECK-NEXT:   {{[0-9a-f]+}}: { call 0x[[#GUARD]] <__linker_guard_weak_undef>
+# CHECK-NEXT:            r0 = #0x0 }
+# CHECK-NEXT:   {{[0-9a-f]+}}: { if (p0) call 0x[[#GUARD]] <__linker_guard_weak_undef> }
+# CHECK-NEXT:   {{[0-9a-f]+}}: { r0 = #0x0 ; jump 0x[[#GUARD]] <__linker_guard_weak_undef> }
+# CHECK-NEXT:   {{[0-9a-f]+}}: { if (p0) call 0x[[#GUARD]] <__linker_guard_weak_undef>
+# CHECK-NEXT:            if (!p0) call 0x[[#GUARD]] <__linker_guard_weak_undef>
+# CHECK-NEXT:            r2 = add(r0,r1) }
+# CHECK-NEXT:   {{[0-9a-f]+}}: { if (r0==#0) jump:t 0x[[#GUARD]]
+# CHECK-NEXT:            r2 = add(r0,r1) }
+# CHECK-NEXT:          { jumpr r31 }
+
+## The guard section contains a single { jumpr r31 } packet.
+# CHECK:      <__linker_guard_weak_undef>:
+# CHECK-NEXT:   {{[0-9a-f]+}}: { jumpr r31 }
+
+## No thunks should be created.
+# CHECK-NOT: __hexagon_thunk

``````````

</details>


https://github.com/llvm/llvm-project/pull/186613


More information about the llvm-commits mailing list