[llvm] [BOLT][AArch64] Support FEAT_CMPBR branch instructions. (PR #174972)
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 4 07:14:57 PST 2026
https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/174972
>From 9ae12a45f96d2e2fac8439dfb838331492a41336 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 14 Nov 2025 11:43:47 +0000
Subject: [PATCH 01/13] [BOLT][AArch64] Support FEAT_CMPBR branch instructions.
The Armv9.6-A compare-and-branch instructions use a short range 9-bit
immediate value. They do not have a corresponding relocation type in
the ABI. That means they can only be used either in compact code model,
or in non-relocation mode. With this patch I am adding support for
them in BOLT.
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 155 ++++++++++++++++--
bolt/test/AArch64/compare-and-branch.S | 127 ++++++++++++++
2 files changed, 271 insertions(+), 11 deletions(-)
create mode 100644 bolt/test/AArch64/compare-and-branch.S
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 03fb4ddc2f238..1d9e4cbce118a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -716,6 +716,51 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Insts;
}
+ bool isCompAndBranch(const MCInst &Inst) const {
+ const unsigned opcode = Inst.getOpcode();
+ switch (opcode) {
+ case AArch64::CBBEQWrr:
+ case AArch64::CBBGEWrr:
+ case AArch64::CBBGTWrr:
+ case AArch64::CBBHIWrr:
+ case AArch64::CBBHSWrr:
+ case AArch64::CBBNEWrr:
+ case AArch64::CBHEQWrr:
+ case AArch64::CBHGEWrr:
+ case AArch64::CBHGTWrr:
+ case AArch64::CBHHIWrr:
+ case AArch64::CBHHSWrr:
+ case AArch64::CBHNEWrr:
+ case AArch64::CBHIWrr:
+ case AArch64::CBHIXrr:
+ case AArch64::CBHSWrr:
+ case AArch64::CBHSXrr:
+ case AArch64::CBNEWrr:
+ case AArch64::CBNEXrr:
+ case AArch64::CBEQWrr:
+ case AArch64::CBEQXrr:
+ case AArch64::CBGEWrr:
+ case AArch64::CBGEXrr:
+ case AArch64::CBGTWrr:
+ case AArch64::CBGTXrr:
+ case AArch64::CBEQWri:
+ case AArch64::CBEQXri:
+ case AArch64::CBGTWri:
+ case AArch64::CBGTXri:
+ case AArch64::CBHIWri:
+ case AArch64::CBHIXri:
+ case AArch64::CBLOWri:
+ case AArch64::CBLOXri:
+ case AArch64::CBLTWri:
+ case AArch64::CBLTXri:
+ case AArch64::CBNEWri:
+ case AArch64::CBNEXri:
+ return true;
+ default:
+ return false;
+ }
+ }
+
bool isTB(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::TBNZW ||
Inst.getOpcode() == AArch64::TBNZX ||
@@ -1260,7 +1305,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst) ||
isMOVW(Inst))
OpNum = 1;
- if (isTB(Inst) || isAddXri(Inst))
+ if (isTB(Inst) || isAddXri(Inst) || isCompAndBranch(Inst))
OpNum = 2;
}
@@ -1329,7 +1374,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
++OI;
}
- if (isTB(Inst)) {
+ if (isTB(Inst) || isCompAndBranch(Inst)) {
assert(MCPlus::getNumPrimeOperands(Inst) >= 3 &&
"Invalid number of operands");
OI = Inst.begin() + 2;
@@ -1682,6 +1727,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::CBZX: return AArch64::CBNZX;
case AArch64::CBNZW: return AArch64::CBZW;
case AArch64::CBNZX: return AArch64::CBZX;
+ case AArch64::CBGTWrr: return AArch64::CBGEWrr;
+ case AArch64::CBGTXrr: return AArch64::CBGEXrr;
+ case AArch64::CBGEWrr: return AArch64::CBGTWrr;
+ case AArch64::CBGEXrr: return AArch64::CBGTXrr;
+ case AArch64::CBHIWrr: return AArch64::CBHSWrr;
+ case AArch64::CBHIXrr: return AArch64::CBHSXrr;
+ case AArch64::CBHSWrr: return AArch64::CBHIWrr;
+ case AArch64::CBHSXrr: return AArch64::CBHIXrr;
+ case AArch64::CBEQWrr: return AArch64::CBNEWrr;
+ case AArch64::CBEQXrr: return AArch64::CBNEXrr;
+ case AArch64::CBNEWrr: return AArch64::CBEQWrr;
+ case AArch64::CBNEXrr: return AArch64::CBEQXrr;
+ case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
+ case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
+ case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
+ case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
+ case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+ case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
+ case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
+ case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
+ case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
+ case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+ case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
+ case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
+ case AArch64::CBGTWri: return AArch64::CBLTWri;
+ case AArch64::CBGTXri: return AArch64::CBLTXri;
+ case AArch64::CBLTWri: return AArch64::CBGTWri;
+ case AArch64::CBLTXri: return AArch64::CBGTXri;
+ case AArch64::CBHIWri: return AArch64::CBLOWri;
+ case AArch64::CBHIXri: return AArch64::CBLOXri;
+ case AArch64::CBLOWri: return AArch64::CBHIWri;
+ case AArch64::CBLOXri: return AArch64::CBHIXri;
+ case AArch64::CBEQWri: return AArch64::CBNEWri;
+ case AArch64::CBEQXri: return AArch64::CBNEXri;
+ case AArch64::CBNEWri: return AArch64::CBEQWri;
+ case AArch64::CBNEXri: return AArch64::CBEQXri;
}
}
@@ -1702,9 +1783,63 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
+ bool needsRegSwap(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return false;
+ case AArch64::CBGTWrr:
+ case AArch64::CBGTXrr:
+ case AArch64::CBGEWrr:
+ case AArch64::CBGEXrr:
+ case AArch64::CBHIWrr:
+ case AArch64::CBHIXrr:
+ case AArch64::CBHSWrr:
+ case AArch64::CBHSXrr:
+ case AArch64::CBHGTWrr:
+ case AArch64::CBHGEWrr:
+ case AArch64::CBHHIWrr:
+ case AArch64::CBHHSWrr:
+ case AArch64::CBBGTWrr:
+ case AArch64::CBBGEWrr:
+ case AArch64::CBBHIWrr:
+ case AArch64::CBBHSWrr:
+ return true;
+ }
+ }
+
+ bool needsImmDec(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return false;
+ case AArch64::CBLTWri:
+ case AArch64::CBLTXri:
+ case AArch64::CBLOWri:
+ case AArch64::CBLOXri:
+ return true;
+ }
+ }
+
+ bool needsImmInc(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return false;
+ case AArch64::CBGTWri:
+ case AArch64::CBGTXri:
+ case AArch64::CBHIWri:
+ case AArch64::CBHIXri:
+ return true;
+ }
+ }
+
void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
- if (isTB(Inst) || isCB(Inst)) {
+ if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
+ if (needsRegSwap(Inst.getOpcode()))
+ std::swap(Inst.getOperand(0), Inst.getOperand(1));
+ else if (needsImmDec(Inst.getOpcode()))
+ Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
+ else if (needsImmInc(Inst.getOpcode()))
+ Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
} else if (Inst.getOpcode() == AArch64::Bcc) {
@@ -1721,18 +1856,16 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
int getPCRelEncodingSize(const MCInst &Inst) const override {
+ if (isCompAndBranch(Inst))
+ return 11;
+ if (isTB(Inst))
+ return 16;
+ if (isCB(Inst))
+ return 21;
switch (Inst.getOpcode()) {
default:
llvm_unreachable("Failed to get pcrel encoding size");
return 0;
- case AArch64::TBZW: return 16;
- case AArch64::TBZX: return 16;
- case AArch64::TBNZW: return 16;
- case AArch64::TBNZX: return 16;
- case AArch64::CBZW: return 21;
- case AArch64::CBZX: return 21;
- case AArch64::CBNZW: return 21;
- case AArch64::CBNZX: return 21;
case AArch64::B: return 28;
case AArch64::BL: return 28;
case AArch64::Bcc: return 21;
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
new file mode 100644
index 0000000000000..374541c9bd6a0
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -0,0 +1,127 @@
+# This test checks that splitting functions which contain short range
+# conditional branches works in compact code model without relying on
+# relocations. Also checks that splitting works in non-relocation mode,
+# in order to test the branch inversion on those instructions.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
+# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
+
+ .globl foo
+ .type foo, %function
+foo:
+# FDATA: 1 foo #.entry_foo# 10
+.entry_foo:
+## Test immediate increment when inverting the branch.
+ cbgt x0, #0, .Lcold_foo
+ mov x0, #1
+.Lcold_foo:
+ ret
+
+ .globl bar
+ .type bar, %function
+bar:
+# FDATA: 1 bar #.entry_bar# 10
+.entry_bar:
+## Test immediate decrement when inverting the branch.
+ cblo x0, #1, .Lcold_bar
+ mov x0, #2
+.Lcold_bar:
+ ret
+
+ .globl baz
+ .type baz, %function
+baz:
+# FDATA: 1 baz #.entry_baz# 10
+.entry_baz:
+## Test register swap when inverting the branch.
+ cbge x0, x1, .Lcold_baz
+ mov x0, #3
+.Lcold_baz:
+ ret
+
+## Force relocation mode.
+.if RELOCATION_MODE
+.reloc 0, R_AARCH64_NONE
+.endif
+
+
+# CHECK-RELOCS: R_AARCH64_NONE *ABS*
+# CHECK-RELOCS-NOT: R_AARCH64_
+
+
+# CHECK-NO-RELOCS-NOT: R_AARCH64_
+
+
+# RELOC-MODE: Disassembly of section .text:
+
+# RELOC-MODE: <foo>:
+# RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: <bar>:
+# RELOC-MODE-NEXT: {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: <baz>:
+# RELOC-MODE-NEXT: {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: Disassembly of section .text.cold:
+
+# RELOC-MODE: <foo.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
+
+# RELOC-MODE: <bar.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
+# RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
+
+# RELOC-MODE: <baz.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
+# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
+
+
+# NON-RELOC-MODE: Disassembly of section .text:
+
+# NON-RELOC-MODE: <foo>:
+# NON-RELOC-MODE-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: <bar>:
+# NON-RELOC-MODE-NEXT: {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: <baz>:
+# NON-RELOC-MODE-NEXT: {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: Disassembly of section .bolt.text:
+
+# NON-RELOC-MODE: <foo.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
+
+# NON-RELOC-MODE: <bar.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
+# NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
+
+# NON-RELOC-MODE: <baz.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
+# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
>From be76581e2d18a72032eee44eecb8813eba06d3c0 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 8 Jan 2026 13:10:56 +0000
Subject: [PATCH 02/13] clang format
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 108 ++++++++++++------
1 file changed, 72 insertions(+), 36 deletions(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 1d9e4cbce118a..eeee48d11f067 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1727,42 +1727,78 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::CBZX: return AArch64::CBNZX;
case AArch64::CBNZW: return AArch64::CBZW;
case AArch64::CBNZX: return AArch64::CBZX;
- case AArch64::CBGTWrr: return AArch64::CBGEWrr;
- case AArch64::CBGTXrr: return AArch64::CBGEXrr;
- case AArch64::CBGEWrr: return AArch64::CBGTWrr;
- case AArch64::CBGEXrr: return AArch64::CBGTXrr;
- case AArch64::CBHIWrr: return AArch64::CBHSWrr;
- case AArch64::CBHIXrr: return AArch64::CBHSXrr;
- case AArch64::CBHSWrr: return AArch64::CBHIWrr;
- case AArch64::CBHSXrr: return AArch64::CBHIXrr;
- case AArch64::CBEQWrr: return AArch64::CBNEWrr;
- case AArch64::CBEQXrr: return AArch64::CBNEXrr;
- case AArch64::CBNEWrr: return AArch64::CBEQWrr;
- case AArch64::CBNEXrr: return AArch64::CBEQXrr;
- case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
- case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
- case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
- case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
- case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
- case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
- case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
- case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
- case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
- case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
- case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
- case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
- case AArch64::CBGTWri: return AArch64::CBLTWri;
- case AArch64::CBGTXri: return AArch64::CBLTXri;
- case AArch64::CBLTWri: return AArch64::CBGTWri;
- case AArch64::CBLTXri: return AArch64::CBGTXri;
- case AArch64::CBHIWri: return AArch64::CBLOWri;
- case AArch64::CBHIXri: return AArch64::CBLOXri;
- case AArch64::CBLOWri: return AArch64::CBHIWri;
- case AArch64::CBLOXri: return AArch64::CBHIXri;
- case AArch64::CBEQWri: return AArch64::CBNEWri;
- case AArch64::CBEQXri: return AArch64::CBNEXri;
- case AArch64::CBNEWri: return AArch64::CBEQWri;
- case AArch64::CBNEXri: return AArch64::CBEQXri;
+ case AArch64::CBGTWrr:
+ return AArch64::CBGEWrr;
+ case AArch64::CBGTXrr:
+ return AArch64::CBGEXrr;
+ case AArch64::CBGEWrr:
+ return AArch64::CBGTWrr;
+ case AArch64::CBGEXrr:
+ return AArch64::CBGTXrr;
+ case AArch64::CBHIWrr:
+ return AArch64::CBHSWrr;
+ case AArch64::CBHIXrr:
+ return AArch64::CBHSXrr;
+ case AArch64::CBHSWrr:
+ return AArch64::CBHIWrr;
+ case AArch64::CBHSXrr:
+ return AArch64::CBHIXrr;
+ case AArch64::CBEQWrr:
+ return AArch64::CBNEWrr;
+ case AArch64::CBEQXrr:
+ return AArch64::CBNEXrr;
+ case AArch64::CBNEWrr:
+ return AArch64::CBEQWrr;
+ case AArch64::CBNEXrr:
+ return AArch64::CBEQXrr;
+ case AArch64::CBHGTWrr:
+ return AArch64::CBHGEWrr;
+ case AArch64::CBHGEWrr:
+ return AArch64::CBHGTWrr;
+ case AArch64::CBHHIWrr:
+ return AArch64::CBHHSWrr;
+ case AArch64::CBHHSWrr:
+ return AArch64::CBHHIWrr;
+ case AArch64::CBHEQWrr:
+ return AArch64::CBHNEWrr;
+ case AArch64::CBHNEWrr:
+ return AArch64::CBHEQWrr;
+ case AArch64::CBBGTWrr:
+ return AArch64::CBBGEWrr;
+ case AArch64::CBBGEWrr:
+ return AArch64::CBBGTWrr;
+ case AArch64::CBBHIWrr:
+ return AArch64::CBBHSWrr;
+ case AArch64::CBBHSWrr:
+ return AArch64::CBBHIWrr;
+ case AArch64::CBBEQWrr:
+ return AArch64::CBBNEWrr;
+ case AArch64::CBBNEWrr:
+ return AArch64::CBBEQWrr;
+ case AArch64::CBGTWri:
+ return AArch64::CBLTWri;
+ case AArch64::CBGTXri:
+ return AArch64::CBLTXri;
+ case AArch64::CBLTWri:
+ return AArch64::CBGTWri;
+ case AArch64::CBLTXri:
+ return AArch64::CBGTXri;
+ case AArch64::CBHIWri:
+ return AArch64::CBLOWri;
+ case AArch64::CBHIXri:
+ return AArch64::CBLOXri;
+ case AArch64::CBLOWri:
+ return AArch64::CBHIWri;
+ case AArch64::CBLOXri:
+ return AArch64::CBHIXri;
+ case AArch64::CBEQWri:
+ return AArch64::CBNEWri;
+ case AArch64::CBEQXri:
+ return AArch64::CBNEXri;
+ case AArch64::CBNEWri:
+ return AArch64::CBEQWri;
+ case AArch64::CBNEXri:
+ return AArch64::CBEQXri;
}
}
>From 4d1f1d88b5994a610b0a26e854f61253ccff9278 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Mon, 12 Jan 2026 13:09:56 +0000
Subject: [PATCH 03/13] Revert "clang format" and escape formatting switch
cases.
This reverts commit be76581e2d18a72032eee44eecb8813eba06d3c0.
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 112 ++++++------------
1 file changed, 39 insertions(+), 73 deletions(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index eeee48d11f067..6617b45af6334 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1715,6 +1715,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
unsigned getInvertedBranchOpcode(unsigned Opcode) const {
+ // clang-format off
switch (Opcode) {
default:
llvm_unreachable("Failed to invert branch opcode");
@@ -1727,79 +1728,44 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::CBZX: return AArch64::CBNZX;
case AArch64::CBNZW: return AArch64::CBZW;
case AArch64::CBNZX: return AArch64::CBZX;
- case AArch64::CBGTWrr:
- return AArch64::CBGEWrr;
- case AArch64::CBGTXrr:
- return AArch64::CBGEXrr;
- case AArch64::CBGEWrr:
- return AArch64::CBGTWrr;
- case AArch64::CBGEXrr:
- return AArch64::CBGTXrr;
- case AArch64::CBHIWrr:
- return AArch64::CBHSWrr;
- case AArch64::CBHIXrr:
- return AArch64::CBHSXrr;
- case AArch64::CBHSWrr:
- return AArch64::CBHIWrr;
- case AArch64::CBHSXrr:
- return AArch64::CBHIXrr;
- case AArch64::CBEQWrr:
- return AArch64::CBNEWrr;
- case AArch64::CBEQXrr:
- return AArch64::CBNEXrr;
- case AArch64::CBNEWrr:
- return AArch64::CBEQWrr;
- case AArch64::CBNEXrr:
- return AArch64::CBEQXrr;
- case AArch64::CBHGTWrr:
- return AArch64::CBHGEWrr;
- case AArch64::CBHGEWrr:
- return AArch64::CBHGTWrr;
- case AArch64::CBHHIWrr:
- return AArch64::CBHHSWrr;
- case AArch64::CBHHSWrr:
- return AArch64::CBHHIWrr;
- case AArch64::CBHEQWrr:
- return AArch64::CBHNEWrr;
- case AArch64::CBHNEWrr:
- return AArch64::CBHEQWrr;
- case AArch64::CBBGTWrr:
- return AArch64::CBBGEWrr;
- case AArch64::CBBGEWrr:
- return AArch64::CBBGTWrr;
- case AArch64::CBBHIWrr:
- return AArch64::CBBHSWrr;
- case AArch64::CBBHSWrr:
- return AArch64::CBBHIWrr;
- case AArch64::CBBEQWrr:
- return AArch64::CBBNEWrr;
- case AArch64::CBBNEWrr:
- return AArch64::CBBEQWrr;
- case AArch64::CBGTWri:
- return AArch64::CBLTWri;
- case AArch64::CBGTXri:
- return AArch64::CBLTXri;
- case AArch64::CBLTWri:
- return AArch64::CBGTWri;
- case AArch64::CBLTXri:
- return AArch64::CBGTXri;
- case AArch64::CBHIWri:
- return AArch64::CBLOWri;
- case AArch64::CBHIXri:
- return AArch64::CBLOXri;
- case AArch64::CBLOWri:
- return AArch64::CBHIWri;
- case AArch64::CBLOXri:
- return AArch64::CBHIXri;
- case AArch64::CBEQWri:
- return AArch64::CBNEWri;
- case AArch64::CBEQXri:
- return AArch64::CBNEXri;
- case AArch64::CBNEWri:
- return AArch64::CBEQWri;
- case AArch64::CBNEXri:
- return AArch64::CBEQXri;
- }
+ case AArch64::CBGTWrr: return AArch64::CBGEWrr;
+ case AArch64::CBGTXrr: return AArch64::CBGEXrr;
+ case AArch64::CBGEWrr: return AArch64::CBGTWrr;
+ case AArch64::CBGEXrr: return AArch64::CBGTXrr;
+ case AArch64::CBHIWrr: return AArch64::CBHSWrr;
+ case AArch64::CBHIXrr: return AArch64::CBHSXrr;
+ case AArch64::CBHSWrr: return AArch64::CBHIWrr;
+ case AArch64::CBHSXrr: return AArch64::CBHIXrr;
+ case AArch64::CBEQWrr: return AArch64::CBNEWrr;
+ case AArch64::CBEQXrr: return AArch64::CBNEXrr;
+ case AArch64::CBNEWrr: return AArch64::CBEQWrr;
+ case AArch64::CBNEXrr: return AArch64::CBEQXrr;
+ case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
+ case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
+ case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
+ case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
+ case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+ case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
+ case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
+ case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
+ case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
+ case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+ case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
+ case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
+ case AArch64::CBGTWri: return AArch64::CBLTWri;
+ case AArch64::CBGTXri: return AArch64::CBLTXri;
+ case AArch64::CBLTWri: return AArch64::CBGTWri;
+ case AArch64::CBLTXri: return AArch64::CBGTXri;
+ case AArch64::CBHIWri: return AArch64::CBLOWri;
+ case AArch64::CBHIXri: return AArch64::CBLOXri;
+ case AArch64::CBLOWri: return AArch64::CBHIWri;
+ case AArch64::CBLOXri: return AArch64::CBHIXri;
+ case AArch64::CBEQWri: return AArch64::CBNEWri;
+ case AArch64::CBEQXri: return AArch64::CBNEXri;
+ case AArch64::CBNEWri: return AArch64::CBEQWri;
+ case AArch64::CBNEXri: return AArch64::CBEQXri;
+ }
+ // clang-format on
}
unsigned getCondCode(const MCInst &Inst) const override {
>From 2a8a0a2a27b81726e6c926540fdbd6197195cc24 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 15 Jan 2026 17:53:43 +0000
Subject: [PATCH 04/13] Fix inverted branch opcodes.
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 175 ++++++++++--------
1 file changed, 98 insertions(+), 77 deletions(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6617b45af6334..f20c0a4a62bf9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -717,44 +717,48 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
bool isCompAndBranch(const MCInst &Inst) const {
- const unsigned opcode = Inst.getOpcode();
- switch (opcode) {
- case AArch64::CBBEQWrr:
- case AArch64::CBBGEWrr:
- case AArch64::CBBGTWrr:
- case AArch64::CBBHIWrr:
- case AArch64::CBBHSWrr:
- case AArch64::CBBNEWrr:
- case AArch64::CBHEQWrr:
- case AArch64::CBHGEWrr:
- case AArch64::CBHGTWrr:
- case AArch64::CBHHIWrr:
- case AArch64::CBHHSWrr:
- case AArch64::CBHNEWrr:
- case AArch64::CBHIWrr:
- case AArch64::CBHIXrr:
- case AArch64::CBHSWrr:
- case AArch64::CBHSXrr:
- case AArch64::CBNEWrr:
- case AArch64::CBNEXrr:
- case AArch64::CBEQWrr:
- case AArch64::CBEQXrr:
- case AArch64::CBGEWrr:
- case AArch64::CBGEXrr:
- case AArch64::CBGTWrr:
- case AArch64::CBGTXrr:
- case AArch64::CBEQWri:
- case AArch64::CBEQXri:
+ const unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ // Compare register with immediate and branch.
case AArch64::CBGTWri:
case AArch64::CBGTXri:
+ case AArch64::CBLTWri:
+ case AArch64::CBLTXri:
case AArch64::CBHIWri:
case AArch64::CBHIXri:
case AArch64::CBLOWri:
case AArch64::CBLOXri:
- case AArch64::CBLTWri:
- case AArch64::CBLTXri:
+ case AArch64::CBEQWri:
+ case AArch64::CBEQXri:
case AArch64::CBNEWri:
case AArch64::CBNEXri:
+ // Compare registers and branch.
+ case AArch64::CBGTWrr:
+ case AArch64::CBGTXrr:
+ case AArch64::CBGEWrr:
+ case AArch64::CBGEXrr:
+ case AArch64::CBHIWrr:
+ case AArch64::CBHIXrr:
+ case AArch64::CBHSWrr:
+ case AArch64::CBHSXrr:
+ case AArch64::CBEQWrr:
+ case AArch64::CBEQXrr:
+ case AArch64::CBNEWrr:
+ case AArch64::CBNEXrr:
+ // Compare bytes and branch.
+ case AArch64::CBBGTWrr:
+ case AArch64::CBBGEWrr:
+ case AArch64::CBBHIWrr:
+ case AArch64::CBBHSWrr:
+ case AArch64::CBBEQWrr:
+ case AArch64::CBBNEWrr:
+ // Compare halfwords and branch.
+ case AArch64::CBHGTWrr:
+ case AArch64::CBHGEWrr:
+ case AArch64::CBHHIWrr:
+ case AArch64::CBHHSWrr:
+ case AArch64::CBHEQWrr:
+ case AArch64::CBHNEWrr:
return true;
default:
return false;
@@ -1728,42 +1732,46 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::CBZX: return AArch64::CBNZX;
case AArch64::CBNZW: return AArch64::CBZW;
case AArch64::CBNZX: return AArch64::CBZX;
- case AArch64::CBGTWrr: return AArch64::CBGEWrr;
- case AArch64::CBGTXrr: return AArch64::CBGEXrr;
- case AArch64::CBGEWrr: return AArch64::CBGTWrr;
- case AArch64::CBGEXrr: return AArch64::CBGTXrr;
- case AArch64::CBHIWrr: return AArch64::CBHSWrr;
- case AArch64::CBHIXrr: return AArch64::CBHSXrr;
- case AArch64::CBHSWrr: return AArch64::CBHIWrr;
- case AArch64::CBHSXrr: return AArch64::CBHIXrr;
+ // Compare register with immediate and branch.
+ case AArch64::CBGTWri: return AArch64::CBLTWri; // +1
+ case AArch64::CBGTXri: return AArch64::CBLTXri; // +1
+ case AArch64::CBLTWri: return AArch64::CBGTWri; // -1
+ case AArch64::CBLTXri: return AArch64::CBGTXri; // -1
+ case AArch64::CBHIWri: return AArch64::CBLOWri; // +1
+ case AArch64::CBHIXri: return AArch64::CBLOXri; // +1
+ case AArch64::CBLOWri: return AArch64::CBHIWri; // -1
+ case AArch64::CBLOXri: return AArch64::CBHIXri; // -1
+ case AArch64::CBEQWri: return AArch64::CBNEWri;
+ case AArch64::CBEQXri: return AArch64::CBNEXri;
+ case AArch64::CBNEWri: return AArch64::CBEQWri;
+ case AArch64::CBNEXri: return AArch64::CBEQXri;
+ // Compare registers and branch.
+ case AArch64::CBGTWrr: return AArch64::CBGEWrr; // swap
+ case AArch64::CBGTXrr: return AArch64::CBGEXrr; // swap
+ case AArch64::CBGEWrr: return AArch64::CBGTWrr; // swap
+ case AArch64::CBGEXrr: return AArch64::CBGTXrr; // swap
+ case AArch64::CBHIWrr: return AArch64::CBHSWrr; // swap
+ case AArch64::CBHIXrr: return AArch64::CBHSXrr; // swap
+ case AArch64::CBHSWrr: return AArch64::CBHIWrr; // swap
+ case AArch64::CBHSXrr: return AArch64::CBHIXrr; // swap
case AArch64::CBEQWrr: return AArch64::CBNEWrr;
case AArch64::CBEQXrr: return AArch64::CBNEXrr;
case AArch64::CBNEWrr: return AArch64::CBEQWrr;
case AArch64::CBNEXrr: return AArch64::CBEQXrr;
- case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
- case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
- case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
- case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
- case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
- case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
- case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
- case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
- case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
- case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+ // Compare bytes and branch.
+ case AArch64::CBBGTWrr: return AArch64::CBBGEWrr; // swap
+ case AArch64::CBBGEWrr: return AArch64::CBBGTWrr; // swap
+ case AArch64::CBBHIWrr: return AArch64::CBBHSWrr; // swap
+ case AArch64::CBBHSWrr: return AArch64::CBBHIWrr; // swap
case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
- case AArch64::CBGTWri: return AArch64::CBLTWri;
- case AArch64::CBGTXri: return AArch64::CBLTXri;
- case AArch64::CBLTWri: return AArch64::CBGTWri;
- case AArch64::CBLTXri: return AArch64::CBGTXri;
- case AArch64::CBHIWri: return AArch64::CBLOWri;
- case AArch64::CBHIXri: return AArch64::CBLOXri;
- case AArch64::CBLOWri: return AArch64::CBHIWri;
- case AArch64::CBLOXri: return AArch64::CBHIXri;
- case AArch64::CBEQWri: return AArch64::CBNEWri;
- case AArch64::CBEQXri: return AArch64::CBNEXri;
- case AArch64::CBNEWri: return AArch64::CBEQWri;
- case AArch64::CBNEXri: return AArch64::CBEQXri;
+ // Compare halfwords and branch.
+ case AArch64::CBHGTWrr: return AArch64::CBHGEWrr; // swap
+ case AArch64::CBHGEWrr: return AArch64::CBHGTWrr; // swap
+ case AArch64::CBHHIWrr: return AArch64::CBHHSWrr; // swap
+ case AArch64::CBHHSWrr: return AArch64::CBHHIWrr; // swap
+ case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+ case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
}
// clang-format on
}
@@ -1789,6 +1797,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
switch (Opcode) {
default:
return false;
+ // Compare registers and branch.
case AArch64::CBGTWrr:
case AArch64::CBGTXrr:
case AArch64::CBGEWrr:
@@ -1797,14 +1806,16 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::CBHIXrr:
case AArch64::CBHSWrr:
case AArch64::CBHSXrr:
- case AArch64::CBHGTWrr:
- case AArch64::CBHGEWrr:
- case AArch64::CBHHIWrr:
- case AArch64::CBHHSWrr:
+ // Compare bytes and branch.
case AArch64::CBBGTWrr:
case AArch64::CBBGEWrr:
case AArch64::CBBHIWrr:
case AArch64::CBBHSWrr:
+ // Compare halfwords and branch.
+ case AArch64::CBHGTWrr:
+ case AArch64::CBHGEWrr:
+ case AArch64::CBHHIWrr:
+ case AArch64::CBHHSWrr:
return true;
}
}
@@ -1813,10 +1824,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
switch (Opcode) {
default:
return false;
- case AArch64::CBLTWri:
- case AArch64::CBLTXri:
- case AArch64::CBLOWri:
- case AArch64::CBLOXri:
+ case AArch64::CBGTWri:
+ case AArch64::CBGTXri:
+ case AArch64::CBHIWri:
+ case AArch64::CBHIXri:
return true;
}
}
@@ -1825,10 +1836,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
switch (Opcode) {
default:
return false;
- case AArch64::CBGTWri:
- case AArch64::CBGTXri:
- case AArch64::CBHIWri:
- case AArch64::CBHIXri:
+ case AArch64::CBLTWri:
+ case AArch64::CBLTXri:
+ case AArch64::CBLOWri:
+ case AArch64::CBLOXri:
return true;
}
}
@@ -1836,14 +1847,24 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
- if (needsRegSwap(Inst.getOpcode()))
+ unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
+ Inst.setOpcode(InvertedOpcode);
+ assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+ // The FEAT_CMPBR compare-and-branch instructions cannot encode all
+ // the possible condition codes, therefore we either have to adjust
+ // the immediate value by +-1, or to swap the register operands
+ // when reversing the branch condition.
+ if (needsRegSwap(InvertedOpcode))
std::swap(Inst.getOperand(0), Inst.getOperand(1));
- else if (needsImmDec(Inst.getOpcode()))
+ else if (needsImmDec(InvertedOpcode)) {
+ assert(Inst.getOperand(1).getImm() > 0 &&
+ "compare-and-branch immediate operand out-of-bounds");
Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
- else if (needsImmInc(Inst.getOpcode()))
+ } else if (needsImmInc(InvertedOpcode)) {
+ assert(Inst.getOperand(1).getImm() < 63 &&
+ "compare-and-branch immediate operand out-of-bounds");
Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
- Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
- assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+ }
} else if (Inst.getOpcode() == AArch64::Bcc) {
Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
>From 742359665fcba62b22245819ed4e7effc620ac5b Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 16 Jan 2026 11:01:44 +0000
Subject: [PATCH 05/13] Handle irreversible branches.
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 22 +++++++++-----
bolt/test/AArch64/compare-and-branch.S | 30 +++++++++++++++++++
2 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index f20c0a4a62bf9..fe6b1065243e9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1844,8 +1844,21 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
+ bool isReversibleBranch(const MCInst &Inst) const override {
+ if (isCompAndBranch(Inst)) {
+ unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
+ if (needsImmDec(InvertedOpcode))
+ return Inst.getOperand(1).getImm() > 0;
+ if (needsImmInc(InvertedOpcode))
+ return Inst.getOperand(1).getImm() < 63;
+ }
+ return MCPlusBuilder::isReversibleBranch(Inst);
+ }
+
void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
+ assert(isReversibleBranch(Inst) && "Cannot reverse branch");
+
if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
Inst.setOpcode(InvertedOpcode);
@@ -1856,15 +1869,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// when reversing the branch condition.
if (needsRegSwap(InvertedOpcode))
std::swap(Inst.getOperand(0), Inst.getOperand(1));
- else if (needsImmDec(InvertedOpcode)) {
- assert(Inst.getOperand(1).getImm() > 0 &&
- "compare-and-branch immediate operand out-of-bounds");
+ else if (needsImmDec(InvertedOpcode))
Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
- } else if (needsImmInc(InvertedOpcode)) {
- assert(Inst.getOperand(1).getImm() < 63 &&
- "compare-and-branch immediate operand out-of-bounds");
+ else if (needsImmInc(InvertedOpcode))
Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
- }
} else if (Inst.getOpcode() == AArch64::Bcc) {
Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 374541c9bd6a0..6cb69da487949 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -50,6 +50,16 @@ baz:
.Lcold_baz:
ret
+ .globl irreversible
+ .type irreversible, %function
+irreversible:
+# FDATA: 1 irreversible #.entry_irreversible# 10
+.entry_irreversible:
+ cbgt x0, #63, .Lcold_irreversible
+ mov x0, #4
+.Lcold_irreversible:
+ ret
+
## Force relocation mode.
.if RELOCATION_MODE
.reloc 0, R_AARCH64_NONE
@@ -80,6 +90,12 @@ baz:
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE: <irreversible>:
+# RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+
+
# RELOC-MODE: Disassembly of section .text.cold:
# RELOC-MODE: <foo.cold.0>:
@@ -94,6 +110,10 @@ baz:
# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
+# RELOC-MODE: <irreversible.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
+# RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
+
# NON-RELOC-MODE: Disassembly of section .text:
@@ -112,6 +132,12 @@ baz:
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE: <irreversible>:
+# NON-RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+
+
# NON-RELOC-MODE: Disassembly of section .bolt.text:
# NON-RELOC-MODE: <foo.cold.0>:
@@ -125,3 +151,7 @@ baz:
# NON-RELOC-MODE: <baz.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
+
+# NON-RELOC-MODE: <irreversible.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
+# NON-RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
>From 61e7719338e3dc39fd8774b1f68b29601903189f Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 16 Jan 2026 11:13:54 +0000
Subject: [PATCH 06/13] Do not early exit isReversibleBranch before calling
parent.
---
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index fe6b1065243e9..207313912fbae 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1847,10 +1847,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
bool isReversibleBranch(const MCInst &Inst) const override {
if (isCompAndBranch(Inst)) {
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
- if (needsImmDec(InvertedOpcode))
- return Inst.getOperand(1).getImm() > 0;
- if (needsImmInc(InvertedOpcode))
- return Inst.getOperand(1).getImm() < 63;
+ if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() <= 0)
+ return false;
+ if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() >= 63)
+ return false;
}
return MCPlusBuilder::isReversibleBranch(Inst);
}
>From 5d77569de15ddfe3ca6b77b65c2b9a54bfc9f7d0 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 29 Jan 2026 14:08:36 +0000
Subject: [PATCH 07/13] replace assertion with bolt-error inside
reverseBranchCondition
---
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index db07775cf8ecf..8a414e8ceba18 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1921,7 +1921,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
- assert(isReversibleBranch(Inst) && "Cannot reverse branch");
+ if (!isReversibleBranch(Inst)) {
+ errs() << "BOLT-ERROR: Cannot reverse branch " << Inst << "\n";
+ exit(1);
+ }
if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
>From ab14a60c4fc8589f379c4a5271675070b3149b11 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 29 Jan 2026 16:08:06 +0000
Subject: [PATCH 08/13] Add large function to make sure long distance jump to
cold section works after splitting.
---
bolt/test/AArch64/compare-and-branch.S | 27 +++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 6cb69da487949..0a52d8b292a17 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -7,13 +7,13 @@
# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops --compact-code-model
# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops
# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
@@ -25,6 +25,7 @@ foo:
## Test immediate increment when inverting the branch.
cbgt x0, #0, .Lcold_foo
mov x0, #1
+ bl large_function
.Lcold_foo:
ret
@@ -60,6 +61,16 @@ irreversible:
.Lcold_irreversible:
ret
+ .globl large_function
+ .type large_function, %function
+large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+.entry_large_function:
+ .rept 300000
+ nop
+ .endr
+ ret
+
## Force relocation mode.
.if RELOCATION_MODE
.reloc 0, R_AARCH64_NONE
@@ -67,9 +78,11 @@ irreversible:
# CHECK-RELOCS: R_AARCH64_NONE *ABS*
+# CHECK-RELOCS: R_AARCH64_CALL26 large_function
# CHECK-RELOCS-NOT: R_AARCH64_
+# CHECK-NO-RELOCS: R_AARCH64_CALL26 large_function
# CHECK-NO-RELOCS-NOT: R_AARCH64_
@@ -95,11 +108,15 @@ irreversible:
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE: <large_function>:
+# RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
+# RELOC-MODE: {{.*}} ret
# RELOC-MODE: Disassembly of section .text.cold:
# RELOC-MODE: <foo.cold.0>:
# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# RELOC-MODE-NEXT: {{.*}} bl 0x[[ADDR12]] <large_function>
# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
# RELOC-MODE: <bar.cold.0>:
@@ -137,11 +154,15 @@ irreversible:
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE: <large_function>:
+# NON-RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
+# NON-RELOC-MODE: {{.*}} ret
-# NON-RELOC-MODE: Disassembly of section .bolt.text:
+# NON-RELOC-MODE: Disassembly of section .text.cold:
# NON-RELOC-MODE: <foo.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# NON-RELOC-MODE-NEXT: {{.*}} bl 0x[[ADDR12]] <large_function>
# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
# NON-RELOC-MODE: <bar.cold.0>:
>From 0e24094b997a34a0472f25979fb9d9079a848b25 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 10:49:00 +0000
Subject: [PATCH 09/13] Revert "Add large function to make sure long distance
jump to cold section works after splitting."
This reverts commit ab14a60c4fc8589f379c4a5271675070b3149b11.
---
bolt/test/AArch64/compare-and-branch.S | 27 +++-----------------------
1 file changed, 3 insertions(+), 24 deletions(-)
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 0a52d8b292a17..6cb69da487949 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -7,13 +7,13 @@
# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops --compact-code-model
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
@@ -25,7 +25,6 @@ foo:
## Test immediate increment when inverting the branch.
cbgt x0, #0, .Lcold_foo
mov x0, #1
- bl large_function
.Lcold_foo:
ret
@@ -61,16 +60,6 @@ irreversible:
.Lcold_irreversible:
ret
- .globl large_function
- .type large_function, %function
-large_function:
-# FDATA: 1 large_function #.entry_large_function# 10
-.entry_large_function:
- .rept 300000
- nop
- .endr
- ret
-
## Force relocation mode.
.if RELOCATION_MODE
.reloc 0, R_AARCH64_NONE
@@ -78,11 +67,9 @@ large_function:
# CHECK-RELOCS: R_AARCH64_NONE *ABS*
-# CHECK-RELOCS: R_AARCH64_CALL26 large_function
# CHECK-RELOCS-NOT: R_AARCH64_
-# CHECK-NO-RELOCS: R_AARCH64_CALL26 large_function
# CHECK-NO-RELOCS-NOT: R_AARCH64_
@@ -108,15 +95,11 @@ large_function:
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE: <large_function>:
-# RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
-# RELOC-MODE: {{.*}} ret
# RELOC-MODE: Disassembly of section .text.cold:
# RELOC-MODE: <foo.cold.0>:
# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
-# RELOC-MODE-NEXT: {{.*}} bl 0x[[ADDR12]] <large_function>
# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
# RELOC-MODE: <bar.cold.0>:
@@ -154,15 +137,11 @@ large_function:
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE: <large_function>:
-# NON-RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
-# NON-RELOC-MODE: {{.*}} ret
-# NON-RELOC-MODE: Disassembly of section .text.cold:
+# NON-RELOC-MODE: Disassembly of section .bolt.text:
# NON-RELOC-MODE: <foo.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
-# NON-RELOC-MODE-NEXT: {{.*}} bl 0x[[ADDR12]] <large_function>
# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
# NON-RELOC-MODE: <bar.cold.0>:
>From 8f9188f0e6899bd5e7b58e66c6f070242a6caeb8 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 10:54:30 +0000
Subject: [PATCH 10/13] rename test functions
---
bolt/test/AArch64/compare-and-branch.S | 68 +++++++++++++-------------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 6cb69da487949..e05f607879331 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -17,40 +17,40 @@
# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
- .globl foo
- .type foo, %function
-foo:
-# FDATA: 1 foo #.entry_foo# 10
-.entry_foo:
+ .globl immediate_increment
+ .type immediate_increment, %function
+immediate_increment:
+# FDATA: 1 immediate_increment #.entry_immediate_increment# 10
+.entry_immediate_increment:
## Test immediate increment when inverting the branch.
- cbgt x0, #0, .Lcold_foo
+ cbgt x0, #0, .Lcold_immediate_increment
mov x0, #1
-.Lcold_foo:
+.Lcold_immediate_increment:
ret
- .globl bar
- .type bar, %function
-bar:
-# FDATA: 1 bar #.entry_bar# 10
-.entry_bar:
+ .globl immediate_decrement
+ .type immediate_decrement, %function
+immediate_decrement:
+# FDATA: 1 immediate_decrement #.entry_immediate_decrement# 10
+.entry_immediate_decrement:
## Test immediate decrement when inverting the branch.
- cblo x0, #1, .Lcold_bar
+ cblo x0, #1, .Lcold_immediate_decrement
mov x0, #2
-.Lcold_bar:
+.Lcold_immediate_decrement:
ret
- .globl baz
- .type baz, %function
-baz:
-# FDATA: 1 baz #.entry_baz# 10
-.entry_baz:
+ .globl register_swap
+ .type register_swap, %function
+register_swap:
+# FDATA: 1 register_swap #.entry_register_swap# 10
+.entry_register_swap:
## Test register swap when inverting the branch.
- cbge x0, x1, .Lcold_baz
+ cbge x0, x1, .Lcold_register_swap
mov x0, #3
-.Lcold_baz:
+.Lcold_register_swap:
ret
- .globl irreversible
+ .globl irreversible
.type irreversible, %function
irreversible:
# FDATA: 1 irreversible #.entry_irreversible# 10
@@ -75,17 +75,17 @@ irreversible:
# RELOC-MODE: Disassembly of section .text:
-# RELOC-MODE: <foo>:
+# RELOC-MODE: <immediate_increment>:
# RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE: <bar>:
+# RELOC-MODE: <immediate_decrement>:
# RELOC-MODE-NEXT: {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE: <baz>:
+# RELOC-MODE: <register_swap>:
# RELOC-MODE-NEXT: {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
@@ -98,15 +98,15 @@ irreversible:
# RELOC-MODE: Disassembly of section .text.cold:
-# RELOC-MODE: <foo.cold.0>:
+# RELOC-MODE: <immediate_increment.cold.0>:
# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
-# RELOC-MODE: <bar.cold.0>:
+# RELOC-MODE: <immediate_decrement.cold.0>:
# RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
# RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
-# RELOC-MODE: <baz.cold.0>:
+# RELOC-MODE: <register_swap.cold.0>:
# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
@@ -117,17 +117,17 @@ irreversible:
# NON-RELOC-MODE: Disassembly of section .text:
-# NON-RELOC-MODE: <foo>:
+# NON-RELOC-MODE: <immediate_increment>:
# NON-RELOC-MODE-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE: <bar>:
+# NON-RELOC-MODE: <immediate_decrement>:
# NON-RELOC-MODE-NEXT: {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE: <baz>:
+# NON-RELOC-MODE: <register_swap>:
# NON-RELOC-MODE-NEXT: {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
@@ -140,15 +140,15 @@ irreversible:
# NON-RELOC-MODE: Disassembly of section .bolt.text:
-# NON-RELOC-MODE: <foo.cold.0>:
+# NON-RELOC-MODE: <immediate_increment.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
-# NON-RELOC-MODE: <bar.cold.0>:
+# NON-RELOC-MODE: <immediate_decrement.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
# NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
-# NON-RELOC-MODE: <baz.cold.0>:
+# NON-RELOC-MODE: <register_swap.cold.0>:
# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
>From e70c30b021a66483efe61ffa7e0dbd16c33e8b60 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 11:50:39 +0000
Subject: [PATCH 11/13] Add a negative test with cold target going out of range
in compact code model
---
.../AArch64/compare-and-branch-unsupported.S | 54 +++++++++++++++++++
1 file changed, 54 insertions(+)
create mode 100644 bolt/test/AArch64/compare-and-branch-unsupported.S
diff --git a/bolt/test/AArch64/compare-and-branch-unsupported.S b/bolt/test/AArch64/compare-and-branch-unsupported.S
new file mode 100644
index 0000000000000..668a45d2dd5b0
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-unsupported.S
@@ -0,0 +1,54 @@
+# This test checks that splitting functions which contain short range
+# conditional branches fails in compact code model due to the cold
+# target being far away, beyond the 128MB limit.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
+# RUN: | FileCheck %s --check-prefix=COMPACT-CODE-FAILS
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ADRP
+
+ .globl foo
+ .type foo, %function
+foo:
+# FDATA: 1 foo #.entry_foo# 10
+.entry_foo:
+ cbgt x0, #0, .Lcold_foo
+ mov x0, #1
+.Lcold_foo:
+ ret
+
+## Add enough space for the cold section to be far away,
+## beyond the 128MB limit of compact code model.
+.space 0x8000000
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+
+# COMPACT-CODE-FAILS: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{.*}} is out of range of Branch26PCRel fixup at address {{.*}}
+
+
+# CHECK-ADRP: Disassembly of section .text:
+
+# CHECK-ADRP: <foo>:
+# CHECK-ADRP-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-ADRP-NEXT: {{.*}} adrp x16, 0x[[ADDR1:[0-9a-f]+]]00 <foo+0x8000000>
+# CHECK-ADRP-NEXT: {{.*}} add x16, x16, #0x40
+# CHECK-ADRP-NEXT: {{.*}} br x16
+# CHECK-ADRP-NEXT: [[ADDR0]]: {{.*}} adrp x16, 0x[[ADDR1]]00 <foo+0x8000000>
+# CHECK-ADRP-NEXT: {{.*}} add x16, x16, #0x44
+# CHECK-ADRP-NEXT: {{.*}} br x16
+
+
+# CHECK-ADRP: Disassembly of section .text.cold:
+
+# CHECK-ADRP: <foo.cold.0>:
+# CHECK-ADRP-NEXT: [[ADDR1]]40: {{.*}} mov x0, #0x1 // =1
+# CHECK-ADRP-NEXT: [[ADDR1]]44: {{.*}} ret
>From 325fa5561a173fc3966458ceb9f4a1d52d502ccf Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 4 Feb 2026 14:48:49 +0000
Subject: [PATCH 12/13] Changes in this revision * Only support compact code
model, diagnose the contrary * and negative test sfor the above * add tests
for function splitting where target: < 1KB > 1KB > 128MB * add tests
for block reordering to exercise branch inversion * add test for block
reordering causing out of bounds fixup value. * minor adjustment in function
isReversibleBranch
---
bolt/include/bolt/Core/MCPlusBuilder.h | 6 +
bolt/lib/Passes/LongJmp.cpp | 6 +
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 8 +-
.../AArch64/compare-and-branch-inversion.S | 105 +++++++++++
.../compare-and-branch-reorder-blocks.S | 49 +++++
.../AArch64/compare-and-branch-unsupported.S | 58 +++---
bolt/test/AArch64/compare-and-branch.S | 173 ++++--------------
7 files changed, 237 insertions(+), 168 deletions(-)
create mode 100644 bolt/test/AArch64/compare-and-branch-inversion.S
create mode 100644 bolt/test/AArch64/compare-and-branch-reorder-blocks.S
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e571e91d85135..cac5fe520ca16 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1747,6 +1747,12 @@ class MCPlusBuilder {
return false;
}
+ /// AArch64 uses this to perform diagnostics in the LongJmp pass.
+ virtual bool isShortRangeBranch(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
/// Receives a list of MCInst of the basic block to analyze and interpret the
/// terminators of this basic block. TBB must be initialized with the original
/// fall-through for this BB.
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 798e1ba08918a..f257db66347f7 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -69,6 +69,12 @@ static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) {
}
static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) {
+ if (BC.isAArch64() && BC.MIB->isShortRangeBranch(Inst) &&
+ !opts::CompactCodeModel) {
+ BC.errs() << "Short range branch " << Inst
+ << " not supported outside compact code model\n";
+ exit(1);
+ }
return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) &&
!BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst);
}
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 8a414e8ceba18..24e97063e96a2 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1911,9 +1911,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
bool isReversibleBranch(const MCInst &Inst) const override {
if (isCompAndBranch(Inst)) {
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
- if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() <= 0)
+ if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() == 0)
return false;
- if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() >= 63)
+ if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() == 63)
return false;
}
return MCPlusBuilder::isReversibleBranch(Inst);
@@ -2345,6 +2345,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
convertJmpToTailCall(Inst);
}
+ bool isShortRangeBranch(const MCInst &Inst) const override {
+ return isCompAndBranch(Inst);
+ }
+
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch,
diff --git a/bolt/test/AArch64/compare-and-branch-inversion.S b/bolt/test/AArch64/compare-and-branch-inversion.S
new file mode 100644
index 0000000000000..338d367b3cbfe
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-inversion.S
@@ -0,0 +1,105 @@
+# This test checks that branch inversion works when reordering blocks which
+# contain short range conditional branches. Handles edge cases, like when
+# the immediate value is the upper or lower allowed value in which case the
+# transformation bails.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
+
+ .globl immediate_increment
+ .type immediate_increment, %function
+immediate_increment:
+.entry0:
+# FDATA: 1 immediate_increment #.entry0# 10
+ cbgt x0, #0, .exit0
+.cold0:
+# FDATA: 1 immediate_increment #.cold0# 1
+ mov x0, #1
+ ret
+.exit0:
+# FDATA: 1 immediate_increment #.exit0# 10
+ mov x0, #2
+ ret
+
+ .globl immediate_decrement
+ .type immediate_decrement, %function
+immediate_decrement:
+.entry1:
+# FDATA: 1 immediate_decrement #.entry1# 10
+ cblo x0, #1, .exit1
+.cold1:
+# FDATA: 1 immediate_decrement #.cold1# 1
+ mov x0, #1
+ ret
+.exit1:
+# FDATA: 1 immediate_decrement #.exit1# 10
+ mov x0, #2
+ ret
+
+ .globl register_swap
+ .type register_swap, %function
+register_swap:
+.entry2:
+# FDATA: 1 register_swap #.entry2# 10
+ cbge x0, x1, .exit2
+.cold2:
+# FDATA: 1 register_swap #.cold2# 1
+ mov x0, #1
+ ret
+.exit2:
+# FDATA: 1 register_swap #.exit2# 10
+ mov x0, #2
+ ret
+
+ .globl irreversible
+ .type irreversible, %function
+irreversible:
+.entry3:
+# FDATA: 1 irreversible #.entry3# 10
+ cbgt x0, #63, .exit3
+.cold3:
+# FDATA: 1 irreversible #.cold3# 1
+ mov x0, #1
+ ret
+.exit3:
+# FDATA: 1 irreversible #.exit3# 10
+ mov x0, #2
+ ret
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+# CHECK: Disassembly of section .text:
+
+# CHECK: <immediate_increment>:
+# CHECK-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR0]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
+
+# CHECK: <immediate_decrement>:
+# CHECK-NEXT: {{.*}} cbhi x0, #0x0, 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
+
+# CHECK: <register_swap>:
+# CHECK-NEXT: {{.*}} cbgt x1, x0, 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
+
+# CHECK: <irreversible>:
+# CHECK-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: [[ADDR3]]: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
new file mode 100644
index 0000000000000..8bd3699b97b9b
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
@@ -0,0 +1,49 @@
+# This test checks that reordering blocks which contain short range
+# conditional branches may break if the target goes out of range.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=300
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops 2>&1 \
+# RUN: | FileCheck %s --check-prefix=FIXUP_OUT_OF_RANGE
+
+ .globl reorder_blocks
+ .type reorder_blocks, %function
+reorder_blocks:
+.entry:
+# FDATA: 1 reorder_blocks #.entry# 10
+ cbgt x0, #0, .cold_exit
+.skip:
+# FDATA: 1 reorder_blocks #.skip# 10
+ b .hot_exit
+.cold_exit:
+# FDATA: 1 reorder_blocks #.cold_exit# 1
+ mov x0, #1
+ ret
+.hot_exit:
+# FDATA: 1 reorder_blocks #.hot_exit# 10
+ .rept NUM_NOPS
+ nop
+ .endr
+ mov x0, #2
+ ret
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+# CHECK: Disassembly of section .text:
+
+# CHECK: <reorder_blocks>:
+# CHECK-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
+
+# FIXUP_OUT_OF_RANGE: error: fixup value out of range
diff --git a/bolt/test/AArch64/compare-and-branch-unsupported.S b/bolt/test/AArch64/compare-and-branch-unsupported.S
index 668a45d2dd5b0..0bab46b43fe24 100644
--- a/bolt/test/AArch64/compare-and-branch-unsupported.S
+++ b/bolt/test/AArch64/compare-and-branch-unsupported.S
@@ -1,54 +1,48 @@
# This test checks that splitting functions which contain short range
-# conditional branches fails in compact code model due to the cold
-# target being far away, beyond the 128MB limit.
+# conditional branches does not work outside compact code model.
# REQUIRES: system-linux, asserts
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
-# RUN: | FileCheck %s --check-prefix=COMPACT-CODE-FAILS
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions 2>&1 \
+# RUN: | FileCheck %s
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256 -DRESERVE_SPACE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ADRP
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops 2>&1 \
+# RUN: | FileCheck %s
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions 2>&1 \
+# RUN: | FileCheck %s
.globl foo
.type foo, %function
foo:
-# FDATA: 1 foo #.entry_foo# 10
.entry_foo:
+# FDATA: 1 foo #.entry_foo# 10
cbgt x0, #0, .Lcold_foo
mov x0, #1
.Lcold_foo:
ret
-## Add enough space for the cold section to be far away,
-## beyond the 128MB limit of compact code model.
+ .globl large_function
+ .type large_function, %function
+large_function:
+.entry_large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+ .rept NUM_NOPS
+ nop
+ .endr
+ ret
+
+.if RESERVE_SPACE
.space 0x8000000
+.endif
## Force relocation mode.
.reloc 0, R_AARCH64_NONE
-
-# COMPACT-CODE-FAILS: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{.*}} is out of range of Branch26PCRel fixup at address {{.*}}
-
-
-# CHECK-ADRP: Disassembly of section .text:
-
-# CHECK-ADRP: <foo>:
-# CHECK-ADRP-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# CHECK-ADRP-NEXT: {{.*}} adrp x16, 0x[[ADDR1:[0-9a-f]+]]00 <foo+0x8000000>
-# CHECK-ADRP-NEXT: {{.*}} add x16, x16, #0x40
-# CHECK-ADRP-NEXT: {{.*}} br x16
-# CHECK-ADRP-NEXT: [[ADDR0]]: {{.*}} adrp x16, 0x[[ADDR1]]00 <foo+0x8000000>
-# CHECK-ADRP-NEXT: {{.*}} add x16, x16, #0x44
-# CHECK-ADRP-NEXT: {{.*}} br x16
-
-
-# CHECK-ADRP: Disassembly of section .text.cold:
-
-# CHECK-ADRP: <foo.cold.0>:
-# CHECK-ADRP-NEXT: [[ADDR1]]40: {{.*}} mov x0, #0x1 // =1
-# CHECK-ADRP-NEXT: [[ADDR1]]44: {{.*}} ret
+# CHECK: Short range branch {{.*}} not supported outside compact code model
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index e05f607879331..b9309a1ec69a1 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -1,157 +1,62 @@
# This test checks that splitting functions which contain short range
# conditional branches works in compact code model without relying on
-# relocations. Also checks that splitting works in non-relocation mode,
-# in order to test the branch inversion on those instructions.
+# relocations.
# REQUIRES: system-linux, asserts
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
-# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256 -DRESERVE_SPACE=0
# RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
-# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model --keep-nops
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
- .globl immediate_increment
- .type immediate_increment, %function
-immediate_increment:
-# FDATA: 1 immediate_increment #.entry_immediate_increment# 10
-.entry_immediate_increment:
-## Test immediate increment when inverting the branch.
- cbgt x0, #0, .Lcold_immediate_increment
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
+# RUN: | FileCheck %s --check-prefix=BEYOND-128MB
+
+ .globl foo
+ .type foo, %function
+foo:
+.entry_foo:
+# FDATA: 1 foo #.entry_foo# 10
+ cbgt x0, #0, .Lcold_foo
mov x0, #1
-.Lcold_immediate_increment:
- ret
-
- .globl immediate_decrement
- .type immediate_decrement, %function
-immediate_decrement:
-# FDATA: 1 immediate_decrement #.entry_immediate_decrement# 10
-.entry_immediate_decrement:
-## Test immediate decrement when inverting the branch.
- cblo x0, #1, .Lcold_immediate_decrement
- mov x0, #2
-.Lcold_immediate_decrement:
+.Lcold_foo:
ret
- .globl register_swap
- .type register_swap, %function
-register_swap:
-# FDATA: 1 register_swap #.entry_register_swap# 10
-.entry_register_swap:
-## Test register swap when inverting the branch.
- cbge x0, x1, .Lcold_register_swap
- mov x0, #3
-.Lcold_register_swap:
+ .globl large_function
+ .type large_function, %function
+large_function:
+.entry_large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+ .rept NUM_NOPS
+ nop
+ .endr
ret
- .globl irreversible
- .type irreversible, %function
-irreversible:
-# FDATA: 1 irreversible #.entry_irreversible# 10
-.entry_irreversible:
- cbgt x0, #63, .Lcold_irreversible
- mov x0, #4
-.Lcold_irreversible:
- ret
+.if RESERVE_SPACE
+.space 0x8000000
+.endif
## Force relocation mode.
-.if RELOCATION_MODE
.reloc 0, R_AARCH64_NONE
-.endif
-
-
-# CHECK-RELOCS: R_AARCH64_NONE *ABS*
-# CHECK-RELOCS-NOT: R_AARCH64_
-
-
-# CHECK-NO-RELOCS-NOT: R_AARCH64_
-
-
-# RELOC-MODE: Disassembly of section .text:
-
-# RELOC-MODE: <immediate_increment>:
-# RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <immediate_decrement>:
-# RELOC-MODE-NEXT: {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <register_swap>:
-# RELOC-MODE-NEXT: {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <irreversible>:
-# RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-
-
-# RELOC-MODE: Disassembly of section .text.cold:
-
-# RELOC-MODE: <immediate_increment.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
-# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
-
-# RELOC-MODE: <immediate_decrement.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
-# RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
-
-# RELOC-MODE: <register_swap.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
-# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
-
-# RELOC-MODE: <irreversible.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
-# RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
-
-
-# NON-RELOC-MODE: Disassembly of section .text:
-
-# NON-RELOC-MODE: <immediate_increment>:
-# NON-RELOC-MODE-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <immediate_decrement>:
-# NON-RELOC-MODE-NEXT: {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b 0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <register_swap>:
-# NON-RELOC-MODE-NEXT: {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b 0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <irreversible>:
-# NON-RELOC-MODE-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: {{.*}} b 0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b 0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: Disassembly of section .bolt.text:
+# CHECK: Disassembly of section .text:
-# NON-RELOC-MODE: <immediate_increment.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
-# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
+# CHECK: <foo>:
+# CHECK-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: [[ADDR0]]: {{.*}} b 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE: <immediate_decrement.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
-# NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
+# CHECK: Disassembly of section .text.cold:
-# NON-RELOC-MODE: <register_swap.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
-# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
+# CHECK: <foo.cold.0>:
+# CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: [[ADDR2]]: {{.*}} ret
-# NON-RELOC-MODE: <irreversible.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
-# NON-RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
+# BEYOND-128MB: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{0x[0-9a-f]+}} (<anonymous symbol>) is out of range of Branch26PCRel fixup at address {{0x[0-9a-f]+}}
>From 1a5373a88f30acc2246d8e97c1a7e510449a5ee1 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 4 Feb 2026 15:14:39 +0000
Subject: [PATCH 13/13] Change NUM_NOPS from 300 to 256 in test script
---
bolt/test/AArch64/compare-and-branch-reorder-blocks.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
index 8bd3699b97b9b..10ec75d18dd96 100644
--- a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
+++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
@@ -8,7 +8,7 @@
# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
# RUN: llvm-objdump -d %t.bolt | FileCheck %s
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=300
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256
# RUN: link_fdata --no-lbr %s %t %t.fdata
# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops 2>&1 \
# RUN: | FileCheck %s --check-prefix=FIXUP_OUT_OF_RANGE
More information about the llvm-commits
mailing list