[llvm] [BOLT] Fix handling of ambiguous jump table entries (PR #155291)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 13:51:27 PDT 2025
https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/155291
Jump tables may contain entries that point immediately past the end of their parent function. Normally, such entries are generated by the compiler as a result of builtin_unreachable() case. We used to replace those entries with a label belonging to their parent function assuming the destination doesn't matter if it's an undefined behavior.
However, if such entry is at the end of the jump table, it could be a real function pointer, not a jump table entry. We rely on heuristics to detect such cases and can drop the trailing function pointer entries from the table.
The problem presents when the "unreachable" ambiguous entry is followed by another ambiguous entry corresponding to the start of the parent function. In this case we accept pointers as entries and may incorrectly update the function pointer.
The solution is to keep ambiguous "unreachable" jump table entries identical to the original input, i.e. point to the same function. This change does not affect CFG, but results in the entries being updated with the new function address if it gets relocated.
>From 0d9f2f6b4b80459b784224215623f9c80aa3e9ac Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Thu, 14 Aug 2025 13:16:09 -0700
Subject: [PATCH] [BOLT] Fix handling of ambiguous jump table entries
Jump tables may contain entries that point immediately past the end of
their parent function. Normally, such entries are generated by the
compiler as a result of builtin_unreachable() case. We used to replace
those entries with a label belonging to their parent function assuming
the destination doesn't matter if it's an undefined behavior.
However, if such entry is at the end of the jump table, it could be a
real function pointer, not a jump table entry. We rely on heuristics to
detect such cases and can drop the trailing function pointer entries
from the table.
The problem presents when the "unreachable" ambiguous entry is followed
by another ambiguous entry corresponding to the start of the parent
function. In this case we accept pointers as entries and may incorrectly
update the function pointer.
The solution is to keep ambiguous "unreachable" jump table entries
identical to the original input, i.e. point to the same function.
This change does not affect CFG, but results in the entries being
updated with the new function address if it gets relocated.
---
bolt/lib/Core/BinaryBasicBlock.cpp | 13 ++-
bolt/lib/Core/BinaryFunction.cpp | 4 +-
bolt/lib/Passes/IndirectCallPromotion.cpp | 5 +-
.../X86/jump-table-ambiguous-unreachable.s | 87 +++++++++++++++++++
4 files changed, 102 insertions(+), 7 deletions(-)
create mode 100644 bolt/test/X86/jump-table-ambiguous-unreachable.s
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index 311d5c15b8dca..eeab1ed4d7cff 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -103,9 +103,18 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
Valid &= (Sym == Function->getFunctionEndLabel() ||
Sym == Function->getFunctionEndLabel(getFragmentNum()));
if (!Valid) {
- BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
- << Sym->getName() << "\n";
+ const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Sym);
+ if (TargetBF) {
+ // It's possible for another function to be in the jump table entry
+ // as a result of built-in unreachable.
+ Valid = true;
+ } else {
+ BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
+ << Sym->getName() << "\n";
+ }
}
+ if (!Valid)
+ break;
}
}
} else {
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 8f494f105fbba..c5c3247e01d04 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1959,7 +1959,9 @@ void BinaryFunction::postProcessJumpTables() {
return EntryAddress == Parent->getAddress() + Parent->getSize();
});
if (IsBuiltinUnreachable) {
- MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
+ BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress);
+ MCSymbol *Label = TargetBF ? TargetBF->getSymbol()
+ : getOrCreateLocalLabel(EntryAddress, true);
JT.Entries.push_back(Label);
continue;
}
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 2b5a591f4c7a2..8a01cb974c5da 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -261,10 +261,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) {
MCSymbol *Entry = JT->Entries[I];
const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Entry);
- assert(ToBB || Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()));
- if (Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()))
+ if (!ToBB)
continue;
const Location To(Entry);
const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
diff --git a/bolt/test/X86/jump-table-ambiguous-unreachable.s b/bolt/test/X86/jump-table-ambiguous-unreachable.s
new file mode 100644
index 0000000000000..eb87b9604faf4
--- /dev/null
+++ b/bolt/test/X86/jump-table-ambiguous-unreachable.s
@@ -0,0 +1,87 @@
+## Check that llvm-bolt correctly updates ambiguous jump table entries that
+## can correspond to either builtin_unreachable() or could be a pointer to
+## the next function.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -no-pie -Wl,-q
+
+# RUN: llvm-bolt %t.exe --print-normalized --print-only=foo -o %t.out \
+# RUN: 2>&1 | FileCheck %s
+
+
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ call foo
+ ret
+ .cfi_endproc
+ .size _start, .-_start
+
+ .globl foo
+ .type foo, %function
+foo:
+ .cfi_startproc
+.LBB00:
+ movq 0x8(%rdi), %rdi
+ movzbl 0x1(%rdi), %eax
+.LBB00_br:
+ jmpq *"JUMP_TABLE/foo.0"(,%rax,8)
+# CHECK: jmpq {{.*}} # JUMPTABLE
+# CHECK-NEXT: Successors: {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}
+
+.Ltmp87085:
+ xorl %eax, %eax
+ retq
+
+.Ltmp87086:
+ cmpb $0x0, 0x8(%rdi)
+ setne %al
+ retq
+
+.Ltmp87088:
+ movb $0x1, %al
+ retq
+
+.Ltmp87087:
+ movzbl 0x14(%rdi), %eax
+ andb $0x2, %al
+ shrb %al
+ retq
+
+ .cfi_endproc
+.size foo, .-foo
+
+ .globl bar
+ .type bar, %function
+bar:
+ .cfi_startproc
+ ret
+ .cfi_endproc
+ .size bar, .-bar
+
+# Jump tables
+.section .rodata
+ .global jump_table
+jump_table:
+"JUMP_TABLE/foo.0":
+ .quad bar
+ .quad .Ltmp87085
+ .quad bar
+ .quad .Ltmp87086
+ .quad .Ltmp87087
+ .quad .LBB00
+ .quad .Ltmp87088
+ .quad bar
+ .quad .LBB00
+
+# CHECK: Jump table {{.*}} for function foo
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} :
More information about the llvm-commits
mailing list