[llvm] [BOLT][AArch64] Support FEAT_CMPBR branch instructions. (PR #174972)

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 4 07:14:57 PST 2026


https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/174972

>From 9ae12a45f96d2e2fac8439dfb838331492a41336 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 14 Nov 2025 11:43:47 +0000
Subject: [PATCH 01/13] [BOLT][AArch64] Support FEAT_CMPBR branch instructions.

The Armv9.6-A compare-and-branch instructions use a short range 9-bit
immediate value. They do not have a corresponding relocation type in
the ABI. That means they can only be used either in compact code model,
or in non-relocation mode. With this patch I am adding support for
them in BOLT.
---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 155 ++++++++++++++++--
 bolt/test/AArch64/compare-and-branch.S        | 127 ++++++++++++++
 2 files changed, 271 insertions(+), 11 deletions(-)
 create mode 100644 bolt/test/AArch64/compare-and-branch.S

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 03fb4ddc2f238..1d9e4cbce118a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -716,6 +716,51 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return Insts;
   }
 
+  bool isCompAndBranch(const MCInst &Inst) const {
+    const unsigned opcode = Inst.getOpcode();
+    switch (opcode) {
+    case AArch64::CBBEQWrr:
+    case AArch64::CBBGEWrr:
+    case AArch64::CBBGTWrr:
+    case AArch64::CBBHIWrr:
+    case AArch64::CBBHSWrr:
+    case AArch64::CBBNEWrr:
+    case AArch64::CBHEQWrr:
+    case AArch64::CBHGEWrr:
+    case AArch64::CBHGTWrr:
+    case AArch64::CBHHIWrr:
+    case AArch64::CBHHSWrr:
+    case AArch64::CBHNEWrr:
+    case AArch64::CBHIWrr:
+    case AArch64::CBHIXrr:
+    case AArch64::CBHSWrr:
+    case AArch64::CBHSXrr:
+    case AArch64::CBNEWrr:
+    case AArch64::CBNEXrr:
+    case AArch64::CBEQWrr:
+    case AArch64::CBEQXrr:
+    case AArch64::CBGEWrr:
+    case AArch64::CBGEXrr:
+    case AArch64::CBGTWrr:
+    case AArch64::CBGTXrr:
+    case AArch64::CBEQWri:
+    case AArch64::CBEQXri:
+    case AArch64::CBGTWri:
+    case AArch64::CBGTXri:
+    case AArch64::CBHIWri:
+    case AArch64::CBHIXri:
+    case AArch64::CBLOWri:
+    case AArch64::CBLOXri:
+    case AArch64::CBLTWri:
+    case AArch64::CBLTXri:
+    case AArch64::CBNEWri:
+    case AArch64::CBNEXri:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   bool isTB(const MCInst &Inst) const {
     return (Inst.getOpcode() == AArch64::TBNZW ||
             Inst.getOpcode() == AArch64::TBNZX ||
@@ -1260,7 +1305,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst) ||
           isMOVW(Inst))
         OpNum = 1;
-      if (isTB(Inst) || isAddXri(Inst))
+      if (isTB(Inst) || isAddXri(Inst) || isCompAndBranch(Inst))
         OpNum = 2;
     }
 
@@ -1329,7 +1374,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       ++OI;
     }
 
-    if (isTB(Inst)) {
+    if (isTB(Inst) || isCompAndBranch(Inst)) {
       assert(MCPlus::getNumPrimeOperands(Inst) >= 3 &&
              "Invalid number of operands");
       OI = Inst.begin() + 2;
@@ -1682,6 +1727,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::CBZX:     return AArch64::CBNZX;
     case AArch64::CBNZW:    return AArch64::CBZW;
     case AArch64::CBNZX:    return AArch64::CBZX;
+    case AArch64::CBGTWrr:  return AArch64::CBGEWrr;
+    case AArch64::CBGTXrr:  return AArch64::CBGEXrr;
+    case AArch64::CBGEWrr:  return AArch64::CBGTWrr;
+    case AArch64::CBGEXrr:  return AArch64::CBGTXrr;
+    case AArch64::CBHIWrr:  return AArch64::CBHSWrr;
+    case AArch64::CBHIXrr:  return AArch64::CBHSXrr;
+    case AArch64::CBHSWrr:  return AArch64::CBHIWrr;
+    case AArch64::CBHSXrr:  return AArch64::CBHIXrr;
+    case AArch64::CBEQWrr:  return AArch64::CBNEWrr;
+    case AArch64::CBEQXrr:  return AArch64::CBNEXrr;
+    case AArch64::CBNEWrr:  return AArch64::CBEQWrr;
+    case AArch64::CBNEXrr:  return AArch64::CBEQXrr;
+    case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
+    case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
+    case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
+    case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
+    case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+    case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
+    case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
+    case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
+    case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
+    case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+    case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
+    case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
+    case AArch64::CBGTWri:  return AArch64::CBLTWri;
+    case AArch64::CBGTXri:  return AArch64::CBLTXri;
+    case AArch64::CBLTWri:  return AArch64::CBGTWri;
+    case AArch64::CBLTXri:  return AArch64::CBGTXri;
+    case AArch64::CBHIWri:  return AArch64::CBLOWri;
+    case AArch64::CBHIXri:  return AArch64::CBLOXri;
+    case AArch64::CBLOWri:  return AArch64::CBHIWri;
+    case AArch64::CBLOXri:  return AArch64::CBHIXri;
+    case AArch64::CBEQWri:  return AArch64::CBNEWri;
+    case AArch64::CBEQXri:  return AArch64::CBNEXri;
+    case AArch64::CBNEWri:  return AArch64::CBEQWri;
+    case AArch64::CBNEXri:  return AArch64::CBEQXri;
     }
   }
 
@@ -1702,9 +1783,63 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  bool needsRegSwap(unsigned Opcode) const {
+    switch (Opcode) {
+    default:
+      return false;
+    case AArch64::CBGTWrr:
+    case AArch64::CBGTXrr:
+    case AArch64::CBGEWrr:
+    case AArch64::CBGEXrr:
+    case AArch64::CBHIWrr:
+    case AArch64::CBHIXrr:
+    case AArch64::CBHSWrr:
+    case AArch64::CBHSXrr:
+    case AArch64::CBHGTWrr:
+    case AArch64::CBHGEWrr:
+    case AArch64::CBHHIWrr:
+    case AArch64::CBHHSWrr:
+    case AArch64::CBBGTWrr:
+    case AArch64::CBBGEWrr:
+    case AArch64::CBBHIWrr:
+    case AArch64::CBBHSWrr:
+      return true;
+    }
+  }
+
+  bool needsImmDec(unsigned Opcode) const {
+    switch (Opcode) {
+    default:
+      return false;
+    case AArch64::CBLTWri:
+    case AArch64::CBLTXri:
+    case AArch64::CBLOWri:
+    case AArch64::CBLOXri:
+      return true;
+    }
+  }
+
+  bool needsImmInc(unsigned Opcode) const {
+    switch (Opcode) {
+    default:
+      return false;
+    case AArch64::CBGTWri:
+    case AArch64::CBGTXri:
+    case AArch64::CBHIWri:
+    case AArch64::CBHIXri:
+      return true;
+    }
+  }
+
   void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
                               MCContext *Ctx) const override {
-    if (isTB(Inst) || isCB(Inst)) {
+    if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
+      if (needsRegSwap(Inst.getOpcode()))
+        std::swap(Inst.getOperand(0), Inst.getOperand(1));
+      else if (needsImmDec(Inst.getOpcode()))
+        Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
+      else if (needsImmInc(Inst.getOpcode()))
+        Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
       Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
       assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
     } else if (Inst.getOpcode() == AArch64::Bcc) {
@@ -1721,18 +1856,16 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   }
 
   int getPCRelEncodingSize(const MCInst &Inst) const override {
+    if (isCompAndBranch(Inst))
+      return 11;
+    if (isTB(Inst))
+      return 16;
+    if (isCB(Inst))
+      return 21;
     switch (Inst.getOpcode()) {
     default:
       llvm_unreachable("Failed to get pcrel encoding size");
       return 0;
-    case AArch64::TBZW:     return 16;
-    case AArch64::TBZX:     return 16;
-    case AArch64::TBNZW:    return 16;
-    case AArch64::TBNZX:    return 16;
-    case AArch64::CBZW:     return 21;
-    case AArch64::CBZX:     return 21;
-    case AArch64::CBNZW:    return 21;
-    case AArch64::CBNZX:    return 21;
     case AArch64::B:        return 28;
     case AArch64::BL:       return 28;
     case AArch64::Bcc:      return 21;
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
new file mode 100644
index 0000000000000..374541c9bd6a0
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -0,0 +1,127 @@
+# This test checks that splitting functions which contain short range
+# conditional branches works in compact code model without relying on
+# relocations. Also checks that splitting works in non-relocation mode,
+# in order to test the branch inversion on those instructions.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
+# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
+
+  .globl  foo
+  .type foo, %function
+foo:
+# FDATA: 1 foo #.entry_foo# 10
+.entry_foo:
+## Test immediate increment when inverting the branch.
+    cbgt x0, #0, .Lcold_foo
+    mov x0, #1
+.Lcold_foo:
+    ret
+
+  .globl  bar
+  .type bar, %function
+bar:
+# FDATA: 1 bar #.entry_bar# 10
+.entry_bar:
+## Test immediate decrement when inverting the branch.
+    cblo x0, #1, .Lcold_bar
+    mov x0, #2
+.Lcold_bar:
+    ret
+
+  .globl  baz
+  .type baz, %function
+baz:
+# FDATA: 1 baz #.entry_baz# 10
+.entry_baz:
+## Test register swap when inverting the branch.
+    cbge x0, x1, .Lcold_baz
+    mov x0, #3
+.Lcold_baz:
+    ret
+
+## Force relocation mode.
+.if RELOCATION_MODE
+.reloc 0, R_AARCH64_NONE
+.endif
+
+
+# CHECK-RELOCS: R_AARCH64_NONE *ABS*
+# CHECK-RELOCS-NOT: R_AARCH64_
+
+
+# CHECK-NO-RELOCS-NOT: R_AARCH64_
+
+
+# RELOC-MODE: Disassembly of section .text:
+
+# RELOC-MODE: <foo>:
+# RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: <bar>:
+# RELOC-MODE-NEXT:            {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: <baz>:
+# RELOC-MODE-NEXT:            {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+
+# RELOC-MODE: Disassembly of section .text.cold:
+
+# RELOC-MODE: <foo.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
+
+# RELOC-MODE: <bar.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
+# RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
+
+# RELOC-MODE: <baz.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
+# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
+
+
+# NON-RELOC-MODE: Disassembly of section .text:
+
+# NON-RELOC-MODE: <foo>:
+# NON-RELOC-MODE-NEXT:            {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: <bar>:
+# NON-RELOC-MODE-NEXT:            {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: <baz>:
+# NON-RELOC-MODE-NEXT:            {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
+
+# NON-RELOC-MODE: Disassembly of section .bolt.text:
+
+# NON-RELOC-MODE: <foo.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
+
+# NON-RELOC-MODE: <bar.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
+# NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
+
+# NON-RELOC-MODE: <baz.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
+# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret

>From be76581e2d18a72032eee44eecb8813eba06d3c0 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 8 Jan 2026 13:10:56 +0000
Subject: [PATCH 02/13] clang format

---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 108 ++++++++++++------
 1 file changed, 72 insertions(+), 36 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 1d9e4cbce118a..eeee48d11f067 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1727,42 +1727,78 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::CBZX:     return AArch64::CBNZX;
     case AArch64::CBNZW:    return AArch64::CBZW;
     case AArch64::CBNZX:    return AArch64::CBZX;
-    case AArch64::CBGTWrr:  return AArch64::CBGEWrr;
-    case AArch64::CBGTXrr:  return AArch64::CBGEXrr;
-    case AArch64::CBGEWrr:  return AArch64::CBGTWrr;
-    case AArch64::CBGEXrr:  return AArch64::CBGTXrr;
-    case AArch64::CBHIWrr:  return AArch64::CBHSWrr;
-    case AArch64::CBHIXrr:  return AArch64::CBHSXrr;
-    case AArch64::CBHSWrr:  return AArch64::CBHIWrr;
-    case AArch64::CBHSXrr:  return AArch64::CBHIXrr;
-    case AArch64::CBEQWrr:  return AArch64::CBNEWrr;
-    case AArch64::CBEQXrr:  return AArch64::CBNEXrr;
-    case AArch64::CBNEWrr:  return AArch64::CBEQWrr;
-    case AArch64::CBNEXrr:  return AArch64::CBEQXrr;
-    case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
-    case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
-    case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
-    case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
-    case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
-    case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
-    case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
-    case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
-    case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
-    case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
-    case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
-    case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
-    case AArch64::CBGTWri:  return AArch64::CBLTWri;
-    case AArch64::CBGTXri:  return AArch64::CBLTXri;
-    case AArch64::CBLTWri:  return AArch64::CBGTWri;
-    case AArch64::CBLTXri:  return AArch64::CBGTXri;
-    case AArch64::CBHIWri:  return AArch64::CBLOWri;
-    case AArch64::CBHIXri:  return AArch64::CBLOXri;
-    case AArch64::CBLOWri:  return AArch64::CBHIWri;
-    case AArch64::CBLOXri:  return AArch64::CBHIXri;
-    case AArch64::CBEQWri:  return AArch64::CBNEWri;
-    case AArch64::CBEQXri:  return AArch64::CBNEXri;
-    case AArch64::CBNEWri:  return AArch64::CBEQWri;
-    case AArch64::CBNEXri:  return AArch64::CBEQXri;
+    case AArch64::CBGTWrr:
+      return AArch64::CBGEWrr;
+    case AArch64::CBGTXrr:
+      return AArch64::CBGEXrr;
+    case AArch64::CBGEWrr:
+      return AArch64::CBGTWrr;
+    case AArch64::CBGEXrr:
+      return AArch64::CBGTXrr;
+    case AArch64::CBHIWrr:
+      return AArch64::CBHSWrr;
+    case AArch64::CBHIXrr:
+      return AArch64::CBHSXrr;
+    case AArch64::CBHSWrr:
+      return AArch64::CBHIWrr;
+    case AArch64::CBHSXrr:
+      return AArch64::CBHIXrr;
+    case AArch64::CBEQWrr:
+      return AArch64::CBNEWrr;
+    case AArch64::CBEQXrr:
+      return AArch64::CBNEXrr;
+    case AArch64::CBNEWrr:
+      return AArch64::CBEQWrr;
+    case AArch64::CBNEXrr:
+      return AArch64::CBEQXrr;
+    case AArch64::CBHGTWrr:
+      return AArch64::CBHGEWrr;
+    case AArch64::CBHGEWrr:
+      return AArch64::CBHGTWrr;
+    case AArch64::CBHHIWrr:
+      return AArch64::CBHHSWrr;
+    case AArch64::CBHHSWrr:
+      return AArch64::CBHHIWrr;
+    case AArch64::CBHEQWrr:
+      return AArch64::CBHNEWrr;
+    case AArch64::CBHNEWrr:
+      return AArch64::CBHEQWrr;
+    case AArch64::CBBGTWrr:
+      return AArch64::CBBGEWrr;
+    case AArch64::CBBGEWrr:
+      return AArch64::CBBGTWrr;
+    case AArch64::CBBHIWrr:
+      return AArch64::CBBHSWrr;
+    case AArch64::CBBHSWrr:
+      return AArch64::CBBHIWrr;
+    case AArch64::CBBEQWrr:
+      return AArch64::CBBNEWrr;
+    case AArch64::CBBNEWrr:
+      return AArch64::CBBEQWrr;
+    case AArch64::CBGTWri:
+      return AArch64::CBLTWri;
+    case AArch64::CBGTXri:
+      return AArch64::CBLTXri;
+    case AArch64::CBLTWri:
+      return AArch64::CBGTWri;
+    case AArch64::CBLTXri:
+      return AArch64::CBGTXri;
+    case AArch64::CBHIWri:
+      return AArch64::CBLOWri;
+    case AArch64::CBHIXri:
+      return AArch64::CBLOXri;
+    case AArch64::CBLOWri:
+      return AArch64::CBHIWri;
+    case AArch64::CBLOXri:
+      return AArch64::CBHIXri;
+    case AArch64::CBEQWri:
+      return AArch64::CBNEWri;
+    case AArch64::CBEQXri:
+      return AArch64::CBNEXri;
+    case AArch64::CBNEWri:
+      return AArch64::CBEQWri;
+    case AArch64::CBNEXri:
+      return AArch64::CBEQXri;
     }
   }
 

>From 4d1f1d88b5994a610b0a26e854f61253ccff9278 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Mon, 12 Jan 2026 13:09:56 +0000
Subject: [PATCH 03/13] Revert "clang format" and escape formatting switch
 cases.

This reverts commit be76581e2d18a72032eee44eecb8813eba06d3c0.
---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 112 ++++++------------
 1 file changed, 39 insertions(+), 73 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index eeee48d11f067..6617b45af6334 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1715,6 +1715,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   }
 
   unsigned getInvertedBranchOpcode(unsigned Opcode) const {
+    // clang-format off
     switch (Opcode) {
     default:
       llvm_unreachable("Failed to invert branch opcode");
@@ -1727,79 +1728,44 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::CBZX:     return AArch64::CBNZX;
     case AArch64::CBNZW:    return AArch64::CBZW;
     case AArch64::CBNZX:    return AArch64::CBZX;
-    case AArch64::CBGTWrr:
-      return AArch64::CBGEWrr;
-    case AArch64::CBGTXrr:
-      return AArch64::CBGEXrr;
-    case AArch64::CBGEWrr:
-      return AArch64::CBGTWrr;
-    case AArch64::CBGEXrr:
-      return AArch64::CBGTXrr;
-    case AArch64::CBHIWrr:
-      return AArch64::CBHSWrr;
-    case AArch64::CBHIXrr:
-      return AArch64::CBHSXrr;
-    case AArch64::CBHSWrr:
-      return AArch64::CBHIWrr;
-    case AArch64::CBHSXrr:
-      return AArch64::CBHIXrr;
-    case AArch64::CBEQWrr:
-      return AArch64::CBNEWrr;
-    case AArch64::CBEQXrr:
-      return AArch64::CBNEXrr;
-    case AArch64::CBNEWrr:
-      return AArch64::CBEQWrr;
-    case AArch64::CBNEXrr:
-      return AArch64::CBEQXrr;
-    case AArch64::CBHGTWrr:
-      return AArch64::CBHGEWrr;
-    case AArch64::CBHGEWrr:
-      return AArch64::CBHGTWrr;
-    case AArch64::CBHHIWrr:
-      return AArch64::CBHHSWrr;
-    case AArch64::CBHHSWrr:
-      return AArch64::CBHHIWrr;
-    case AArch64::CBHEQWrr:
-      return AArch64::CBHNEWrr;
-    case AArch64::CBHNEWrr:
-      return AArch64::CBHEQWrr;
-    case AArch64::CBBGTWrr:
-      return AArch64::CBBGEWrr;
-    case AArch64::CBBGEWrr:
-      return AArch64::CBBGTWrr;
-    case AArch64::CBBHIWrr:
-      return AArch64::CBBHSWrr;
-    case AArch64::CBBHSWrr:
-      return AArch64::CBBHIWrr;
-    case AArch64::CBBEQWrr:
-      return AArch64::CBBNEWrr;
-    case AArch64::CBBNEWrr:
-      return AArch64::CBBEQWrr;
-    case AArch64::CBGTWri:
-      return AArch64::CBLTWri;
-    case AArch64::CBGTXri:
-      return AArch64::CBLTXri;
-    case AArch64::CBLTWri:
-      return AArch64::CBGTWri;
-    case AArch64::CBLTXri:
-      return AArch64::CBGTXri;
-    case AArch64::CBHIWri:
-      return AArch64::CBLOWri;
-    case AArch64::CBHIXri:
-      return AArch64::CBLOXri;
-    case AArch64::CBLOWri:
-      return AArch64::CBHIWri;
-    case AArch64::CBLOXri:
-      return AArch64::CBHIXri;
-    case AArch64::CBEQWri:
-      return AArch64::CBNEWri;
-    case AArch64::CBEQXri:
-      return AArch64::CBNEXri;
-    case AArch64::CBNEWri:
-      return AArch64::CBEQWri;
-    case AArch64::CBNEXri:
-      return AArch64::CBEQXri;
-    }
+    case AArch64::CBGTWrr:  return AArch64::CBGEWrr;
+    case AArch64::CBGTXrr:  return AArch64::CBGEXrr;
+    case AArch64::CBGEWrr:  return AArch64::CBGTWrr;
+    case AArch64::CBGEXrr:  return AArch64::CBGTXrr;
+    case AArch64::CBHIWrr:  return AArch64::CBHSWrr;
+    case AArch64::CBHIXrr:  return AArch64::CBHSXrr;
+    case AArch64::CBHSWrr:  return AArch64::CBHIWrr;
+    case AArch64::CBHSXrr:  return AArch64::CBHIXrr;
+    case AArch64::CBEQWrr:  return AArch64::CBNEWrr;
+    case AArch64::CBEQXrr:  return AArch64::CBNEXrr;
+    case AArch64::CBNEWrr:  return AArch64::CBEQWrr;
+    case AArch64::CBNEXrr:  return AArch64::CBEQXrr;
+    case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
+    case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
+    case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
+    case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
+    case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+    case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
+    case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
+    case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
+    case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
+    case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+    case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
+    case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
+    case AArch64::CBGTWri:  return AArch64::CBLTWri;
+    case AArch64::CBGTXri:  return AArch64::CBLTXri;
+    case AArch64::CBLTWri:  return AArch64::CBGTWri;
+    case AArch64::CBLTXri:  return AArch64::CBGTXri;
+    case AArch64::CBHIWri:  return AArch64::CBLOWri;
+    case AArch64::CBHIXri:  return AArch64::CBLOXri;
+    case AArch64::CBLOWri:  return AArch64::CBHIWri;
+    case AArch64::CBLOXri:  return AArch64::CBHIXri;
+    case AArch64::CBEQWri:  return AArch64::CBNEWri;
+    case AArch64::CBEQXri:  return AArch64::CBNEXri;
+    case AArch64::CBNEWri:  return AArch64::CBEQWri;
+    case AArch64::CBNEXri:  return AArch64::CBEQXri;
+    }
+    // clang-format on
   }
 
   unsigned getCondCode(const MCInst &Inst) const override {

>From 2a8a0a2a27b81726e6c926540fdbd6197195cc24 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 15 Jan 2026 17:53:43 +0000
Subject: [PATCH 04/13] Fix inverted branch opcodes.

---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 175 ++++++++++--------
 1 file changed, 98 insertions(+), 77 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6617b45af6334..f20c0a4a62bf9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -717,44 +717,48 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   }
 
   bool isCompAndBranch(const MCInst &Inst) const {
-    const unsigned opcode = Inst.getOpcode();
-    switch (opcode) {
-    case AArch64::CBBEQWrr:
-    case AArch64::CBBGEWrr:
-    case AArch64::CBBGTWrr:
-    case AArch64::CBBHIWrr:
-    case AArch64::CBBHSWrr:
-    case AArch64::CBBNEWrr:
-    case AArch64::CBHEQWrr:
-    case AArch64::CBHGEWrr:
-    case AArch64::CBHGTWrr:
-    case AArch64::CBHHIWrr:
-    case AArch64::CBHHSWrr:
-    case AArch64::CBHNEWrr:
-    case AArch64::CBHIWrr:
-    case AArch64::CBHIXrr:
-    case AArch64::CBHSWrr:
-    case AArch64::CBHSXrr:
-    case AArch64::CBNEWrr:
-    case AArch64::CBNEXrr:
-    case AArch64::CBEQWrr:
-    case AArch64::CBEQXrr:
-    case AArch64::CBGEWrr:
-    case AArch64::CBGEXrr:
-    case AArch64::CBGTWrr:
-    case AArch64::CBGTXrr:
-    case AArch64::CBEQWri:
-    case AArch64::CBEQXri:
+    const unsigned Opcode = Inst.getOpcode();
+    switch (Opcode) {
+    // Compare register with immediate and branch.
     case AArch64::CBGTWri:
     case AArch64::CBGTXri:
+    case AArch64::CBLTWri:
+    case AArch64::CBLTXri:
     case AArch64::CBHIWri:
     case AArch64::CBHIXri:
     case AArch64::CBLOWri:
     case AArch64::CBLOXri:
-    case AArch64::CBLTWri:
-    case AArch64::CBLTXri:
+    case AArch64::CBEQWri:
+    case AArch64::CBEQXri:
     case AArch64::CBNEWri:
     case AArch64::CBNEXri:
+    // Compare registers and branch.
+    case AArch64::CBGTWrr:
+    case AArch64::CBGTXrr:
+    case AArch64::CBGEWrr:
+    case AArch64::CBGEXrr:
+    case AArch64::CBHIWrr:
+    case AArch64::CBHIXrr:
+    case AArch64::CBHSWrr:
+    case AArch64::CBHSXrr:
+    case AArch64::CBEQWrr:
+    case AArch64::CBEQXrr:
+    case AArch64::CBNEWrr:
+    case AArch64::CBNEXrr:
+    // Compare bytes and branch.
+    case AArch64::CBBGTWrr:
+    case AArch64::CBBGEWrr:
+    case AArch64::CBBHIWrr:
+    case AArch64::CBBHSWrr:
+    case AArch64::CBBEQWrr:
+    case AArch64::CBBNEWrr:
+    // Compare halfwords and branch.
+    case AArch64::CBHGTWrr:
+    case AArch64::CBHGEWrr:
+    case AArch64::CBHHIWrr:
+    case AArch64::CBHHSWrr:
+    case AArch64::CBHEQWrr:
+    case AArch64::CBHNEWrr:
       return true;
     default:
       return false;
@@ -1728,42 +1732,46 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::CBZX:     return AArch64::CBNZX;
     case AArch64::CBNZW:    return AArch64::CBZW;
     case AArch64::CBNZX:    return AArch64::CBZX;
-    case AArch64::CBGTWrr:  return AArch64::CBGEWrr;
-    case AArch64::CBGTXrr:  return AArch64::CBGEXrr;
-    case AArch64::CBGEWrr:  return AArch64::CBGTWrr;
-    case AArch64::CBGEXrr:  return AArch64::CBGTXrr;
-    case AArch64::CBHIWrr:  return AArch64::CBHSWrr;
-    case AArch64::CBHIXrr:  return AArch64::CBHSXrr;
-    case AArch64::CBHSWrr:  return AArch64::CBHIWrr;
-    case AArch64::CBHSXrr:  return AArch64::CBHIXrr;
+    // Compare register with immediate and branch.
+    case AArch64::CBGTWri:  return AArch64::CBLTWri; // +1
+    case AArch64::CBGTXri:  return AArch64::CBLTXri; // +1
+    case AArch64::CBLTWri:  return AArch64::CBGTWri; // -1
+    case AArch64::CBLTXri:  return AArch64::CBGTXri; // -1
+    case AArch64::CBHIWri:  return AArch64::CBLOWri; // +1
+    case AArch64::CBHIXri:  return AArch64::CBLOXri; // +1
+    case AArch64::CBLOWri:  return AArch64::CBHIWri; // -1
+    case AArch64::CBLOXri:  return AArch64::CBHIXri; // -1
+    case AArch64::CBEQWri:  return AArch64::CBNEWri;
+    case AArch64::CBEQXri:  return AArch64::CBNEXri;
+    case AArch64::CBNEWri:  return AArch64::CBEQWri;
+    case AArch64::CBNEXri:  return AArch64::CBEQXri;
+    // Compare registers and branch.
+    case AArch64::CBGTWrr:  return AArch64::CBGEWrr; // swap
+    case AArch64::CBGTXrr:  return AArch64::CBGEXrr; // swap
+    case AArch64::CBGEWrr:  return AArch64::CBGTWrr; // swap
+    case AArch64::CBGEXrr:  return AArch64::CBGTXrr; // swap
+    case AArch64::CBHIWrr:  return AArch64::CBHSWrr; // swap
+    case AArch64::CBHIXrr:  return AArch64::CBHSXrr; // swap
+    case AArch64::CBHSWrr:  return AArch64::CBHIWrr; // swap
+    case AArch64::CBHSXrr:  return AArch64::CBHIXrr; // swap
     case AArch64::CBEQWrr:  return AArch64::CBNEWrr;
     case AArch64::CBEQXrr:  return AArch64::CBNEXrr;
     case AArch64::CBNEWrr:  return AArch64::CBEQWrr;
     case AArch64::CBNEXrr:  return AArch64::CBEQXrr;
-    case AArch64::CBHGTWrr: return AArch64::CBHGEWrr;
-    case AArch64::CBHGEWrr: return AArch64::CBHGTWrr;
-    case AArch64::CBHHIWrr: return AArch64::CBHHSWrr;
-    case AArch64::CBHHSWrr: return AArch64::CBHHIWrr;
-    case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
-    case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
-    case AArch64::CBBGTWrr: return AArch64::CBBGEWrr;
-    case AArch64::CBBGEWrr: return AArch64::CBBGTWrr;
-    case AArch64::CBBHIWrr: return AArch64::CBBHSWrr;
-    case AArch64::CBBHSWrr: return AArch64::CBBHIWrr;
+    // Compare bytes and branch.
+    case AArch64::CBBGTWrr: return AArch64::CBBGEWrr; // swap
+    case AArch64::CBBGEWrr: return AArch64::CBBGTWrr; // swap
+    case AArch64::CBBHIWrr: return AArch64::CBBHSWrr; // swap
+    case AArch64::CBBHSWrr: return AArch64::CBBHIWrr; // swap
     case AArch64::CBBEQWrr: return AArch64::CBBNEWrr;
     case AArch64::CBBNEWrr: return AArch64::CBBEQWrr;
-    case AArch64::CBGTWri:  return AArch64::CBLTWri;
-    case AArch64::CBGTXri:  return AArch64::CBLTXri;
-    case AArch64::CBLTWri:  return AArch64::CBGTWri;
-    case AArch64::CBLTXri:  return AArch64::CBGTXri;
-    case AArch64::CBHIWri:  return AArch64::CBLOWri;
-    case AArch64::CBHIXri:  return AArch64::CBLOXri;
-    case AArch64::CBLOWri:  return AArch64::CBHIWri;
-    case AArch64::CBLOXri:  return AArch64::CBHIXri;
-    case AArch64::CBEQWri:  return AArch64::CBNEWri;
-    case AArch64::CBEQXri:  return AArch64::CBNEXri;
-    case AArch64::CBNEWri:  return AArch64::CBEQWri;
-    case AArch64::CBNEXri:  return AArch64::CBEQXri;
+    // Compare halfwords and branch.
+    case AArch64::CBHGTWrr: return AArch64::CBHGEWrr; // swap
+    case AArch64::CBHGEWrr: return AArch64::CBHGTWrr; // swap
+    case AArch64::CBHHIWrr: return AArch64::CBHHSWrr; // swap
+    case AArch64::CBHHSWrr: return AArch64::CBHHIWrr; // swap
+    case AArch64::CBHEQWrr: return AArch64::CBHNEWrr;
+    case AArch64::CBHNEWrr: return AArch64::CBHEQWrr;
     }
     // clang-format on
   }
@@ -1789,6 +1797,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     switch (Opcode) {
     default:
       return false;
+    // Compare registers and branch.
     case AArch64::CBGTWrr:
     case AArch64::CBGTXrr:
     case AArch64::CBGEWrr:
@@ -1797,14 +1806,16 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::CBHIXrr:
     case AArch64::CBHSWrr:
     case AArch64::CBHSXrr:
-    case AArch64::CBHGTWrr:
-    case AArch64::CBHGEWrr:
-    case AArch64::CBHHIWrr:
-    case AArch64::CBHHSWrr:
+    // Compare bytes and branch.
     case AArch64::CBBGTWrr:
     case AArch64::CBBGEWrr:
     case AArch64::CBBHIWrr:
     case AArch64::CBBHSWrr:
+    // Compare halfwords and branch.
+    case AArch64::CBHGTWrr:
+    case AArch64::CBHGEWrr:
+    case AArch64::CBHHIWrr:
+    case AArch64::CBHHSWrr:
       return true;
     }
   }
@@ -1813,10 +1824,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     switch (Opcode) {
     default:
       return false;
-    case AArch64::CBLTWri:
-    case AArch64::CBLTXri:
-    case AArch64::CBLOWri:
-    case AArch64::CBLOXri:
+    case AArch64::CBGTWri:
+    case AArch64::CBGTXri:
+    case AArch64::CBHIWri:
+    case AArch64::CBHIXri:
       return true;
     }
   }
@@ -1825,10 +1836,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     switch (Opcode) {
     default:
       return false;
-    case AArch64::CBGTWri:
-    case AArch64::CBGTXri:
-    case AArch64::CBHIWri:
-    case AArch64::CBHIXri:
+    case AArch64::CBLTWri:
+    case AArch64::CBLTXri:
+    case AArch64::CBLOWri:
+    case AArch64::CBLOXri:
       return true;
     }
   }
@@ -1836,14 +1847,24 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
                               MCContext *Ctx) const override {
     if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
-      if (needsRegSwap(Inst.getOpcode()))
+      unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
+      Inst.setOpcode(InvertedOpcode);
+      assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+      // The FEAT_CMPBR compare-and-branch instructions cannot encode all
+      // the possible condition codes, therefore we either have to adjust
+      // the immediate value by +-1, or to swap the register operands
+      // when reversing the branch condition.
+      if (needsRegSwap(InvertedOpcode))
         std::swap(Inst.getOperand(0), Inst.getOperand(1));
-      else if (needsImmDec(Inst.getOpcode()))
+      else if (needsImmDec(InvertedOpcode)) {
+        assert(Inst.getOperand(1).getImm() > 0 &&
+               "compare-and-branch immediate operand out-of-bounds");
         Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
-      else if (needsImmInc(Inst.getOpcode()))
+      } else if (needsImmInc(InvertedOpcode)) {
+        assert(Inst.getOperand(1).getImm() < 63 &&
+               "compare-and-branch immediate operand out-of-bounds");
         Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
-      Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
-      assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+      }
     } else if (Inst.getOpcode() == AArch64::Bcc) {
       Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
           static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));

>From 742359665fcba62b22245819ed4e7effc620ac5b Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 16 Jan 2026 11:01:44 +0000
Subject: [PATCH 05/13] Handle irreversible branches.

---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 22 +++++++++-----
 bolt/test/AArch64/compare-and-branch.S        | 30 +++++++++++++++++++
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index f20c0a4a62bf9..fe6b1065243e9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1844,8 +1844,21 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  bool isReversibleBranch(const MCInst &Inst) const override {
+    if (isCompAndBranch(Inst)) {
+      unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
+      if (needsImmDec(InvertedOpcode))
+        return Inst.getOperand(1).getImm() > 0;
+      if (needsImmInc(InvertedOpcode))
+        return Inst.getOperand(1).getImm() < 63;
+    }
+    return MCPlusBuilder::isReversibleBranch(Inst);
+  }
+
   void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
                               MCContext *Ctx) const override {
+    assert(isReversibleBranch(Inst) && "Cannot reverse branch");
+
     if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
       Inst.setOpcode(InvertedOpcode);
@@ -1856,15 +1869,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       // when reversing the branch condition.
       if (needsRegSwap(InvertedOpcode))
         std::swap(Inst.getOperand(0), Inst.getOperand(1));
-      else if (needsImmDec(InvertedOpcode)) {
-        assert(Inst.getOperand(1).getImm() > 0 &&
-               "compare-and-branch immediate operand out-of-bounds");
+      else if (needsImmDec(InvertedOpcode))
         Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
-      } else if (needsImmInc(InvertedOpcode)) {
-        assert(Inst.getOperand(1).getImm() < 63 &&
-               "compare-and-branch immediate operand out-of-bounds");
+      else if (needsImmInc(InvertedOpcode))
         Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
-      }
     } else if (Inst.getOpcode() == AArch64::Bcc) {
       Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
           static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 374541c9bd6a0..6cb69da487949 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -50,6 +50,16 @@ baz:
 .Lcold_baz:
     ret
 
+  .globl  irreversible
+  .type irreversible, %function
+irreversible:
+# FDATA: 1 irreversible #.entry_irreversible# 10
+.entry_irreversible:
+    cbgt x0, #63, .Lcold_irreversible
+    mov x0, #4
+.Lcold_irreversible:
+    ret
+
 ## Force relocation mode.
 .if RELOCATION_MODE
 .reloc 0, R_AARCH64_NONE
@@ -80,6 +90,12 @@ baz:
 # RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
 
+# RELOC-MODE: <irreversible>:
+# RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
+# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+
+
 # RELOC-MODE: Disassembly of section .text.cold:
 
 # RELOC-MODE: <foo.cold.0>:
@@ -94,6 +110,10 @@ baz:
 # RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
 # RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
 
+# RELOC-MODE: <irreversible.cold.0>:
+# RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
+# RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
+
 
 # NON-RELOC-MODE: Disassembly of section .text:
 
@@ -112,6 +132,12 @@ baz:
 # NON-RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
 
+# NON-RELOC-MODE: <irreversible>:
+# NON-RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
+# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
+
+
 # NON-RELOC-MODE: Disassembly of section .bolt.text:
 
 # NON-RELOC-MODE: <foo.cold.0>:
@@ -125,3 +151,7 @@ baz:
 # NON-RELOC-MODE: <baz.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
 # NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
+
+# NON-RELOC-MODE: <irreversible.cold.0>:
+# NON-RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
+# NON-RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret

>From 61e7719338e3dc39fd8774b1f68b29601903189f Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 16 Jan 2026 11:13:54 +0000
Subject: [PATCH 06/13] Do not early exit isReversibleBranch before calling
 parent.

---
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index fe6b1065243e9..207313912fbae 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1847,10 +1847,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   bool isReversibleBranch(const MCInst &Inst) const override {
     if (isCompAndBranch(Inst)) {
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
-      if (needsImmDec(InvertedOpcode))
-        return Inst.getOperand(1).getImm() > 0;
-      if (needsImmInc(InvertedOpcode))
-        return Inst.getOperand(1).getImm() < 63;
+      if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() <= 0)
+        return false;
+      if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() >= 63)
+        return false;
     }
     return MCPlusBuilder::isReversibleBranch(Inst);
   }

>From 5d77569de15ddfe3ca6b77b65c2b9a54bfc9f7d0 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 29 Jan 2026 14:08:36 +0000
Subject: [PATCH 07/13] replace assertion with bolt-error inside
 reverseBranchCondition

---
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index db07775cf8ecf..8a414e8ceba18 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1921,7 +1921,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 
   void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
                               MCContext *Ctx) const override {
-    assert(isReversibleBranch(Inst) && "Cannot reverse branch");
+    if (!isReversibleBranch(Inst)) {
+      errs() << "BOLT-ERROR: Cannot reverse branch " << Inst << "\n";
+      exit(1);
+    }
 
     if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());

>From ab14a60c4fc8589f379c4a5271675070b3149b11 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 29 Jan 2026 16:08:06 +0000
Subject: [PATCH 08/13] Add large function to make sure long distance jump to
 cold section works after splitting.

---
 bolt/test/AArch64/compare-and-branch.S | 27 +++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 6cb69da487949..0a52d8b292a17 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -7,13 +7,13 @@
 
 # RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops --compact-code-model
 # RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
 
 # RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops
 # RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
 
@@ -25,6 +25,7 @@ foo:
 ## Test immediate increment when inverting the branch.
     cbgt x0, #0, .Lcold_foo
     mov x0, #1
+    bl large_function
 .Lcold_foo:
     ret
 
@@ -60,6 +61,16 @@ irreversible:
 .Lcold_irreversible:
     ret
 
+  .globl  large_function
+  .type large_function, %function
+large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+.entry_large_function:
+  .rept 300000
+    nop
+  .endr
+  ret
+
 ## Force relocation mode.
 .if RELOCATION_MODE
 .reloc 0, R_AARCH64_NONE
@@ -67,9 +78,11 @@ irreversible:
 
 
 # CHECK-RELOCS: R_AARCH64_NONE *ABS*
+# CHECK-RELOCS: R_AARCH64_CALL26 large_function
 # CHECK-RELOCS-NOT: R_AARCH64_
 
 
+# CHECK-NO-RELOCS: R_AARCH64_CALL26 large_function
 # CHECK-NO-RELOCS-NOT: R_AARCH64_
 
 
@@ -95,11 +108,15 @@ irreversible:
 # RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
 
+# RELOC-MODE: <large_function>:
+# RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
+# RELOC-MODE:                            {{.*}} ret
 
 # RELOC-MODE: Disassembly of section .text.cold:
 
 # RELOC-MODE: <foo.cold.0>:
 # RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# RELOC-MODE-NEXT:            {{.*}} bl  0x[[ADDR12]] <large_function>
 # RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
 
 # RELOC-MODE: <bar.cold.0>:
@@ -137,11 +154,15 @@ irreversible:
 # NON-RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
 
+# NON-RELOC-MODE: <large_function>:
+# NON-RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
+# NON-RELOC-MODE:                            {{.*}} ret
 
-# NON-RELOC-MODE: Disassembly of section .bolt.text:
+# NON-RELOC-MODE: Disassembly of section .text.cold:
 
 # NON-RELOC-MODE: <foo.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# NON-RELOC-MODE-NEXT:            {{.*}} bl  0x[[ADDR12]] <large_function>
 # NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
 
 # NON-RELOC-MODE: <bar.cold.0>:

>From 0e24094b997a34a0472f25979fb9d9079a848b25 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 10:49:00 +0000
Subject: [PATCH 09/13] Revert "Add large function to make sure long distance
 jump to cold section works after splitting."

This reverts commit ab14a60c4fc8589f379c4a5271675070b3149b11.
---
 bolt/test/AArch64/compare-and-branch.S | 27 +++-----------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 0a52d8b292a17..6cb69da487949 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -7,13 +7,13 @@
 
 # RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops --compact-code-model
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
 # RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
 
 # RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
 # RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
 
@@ -25,7 +25,6 @@ foo:
 ## Test immediate increment when inverting the branch.
     cbgt x0, #0, .Lcold_foo
     mov x0, #1
-    bl large_function
 .Lcold_foo:
     ret
 
@@ -61,16 +60,6 @@ irreversible:
 .Lcold_irreversible:
     ret
 
-  .globl  large_function
-  .type large_function, %function
-large_function:
-# FDATA: 1 large_function #.entry_large_function# 10
-.entry_large_function:
-  .rept 300000
-    nop
-  .endr
-  ret
-
 ## Force relocation mode.
 .if RELOCATION_MODE
 .reloc 0, R_AARCH64_NONE
@@ -78,11 +67,9 @@ large_function:
 
 
 # CHECK-RELOCS: R_AARCH64_NONE *ABS*
-# CHECK-RELOCS: R_AARCH64_CALL26 large_function
 # CHECK-RELOCS-NOT: R_AARCH64_
 
 
-# CHECK-NO-RELOCS: R_AARCH64_CALL26 large_function
 # CHECK-NO-RELOCS-NOT: R_AARCH64_
 
 
@@ -108,15 +95,11 @@ large_function:
 # RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
 
-# RELOC-MODE: <large_function>:
-# RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
-# RELOC-MODE:                            {{.*}} ret
 
 # RELOC-MODE: Disassembly of section .text.cold:
 
 # RELOC-MODE: <foo.cold.0>:
 # RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
-# RELOC-MODE-NEXT:            {{.*}} bl  0x[[ADDR12]] <large_function>
 # RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
 
 # RELOC-MODE: <bar.cold.0>:
@@ -154,15 +137,11 @@ large_function:
 # NON-RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
 
-# NON-RELOC-MODE: <large_function>:
-# NON-RELOC-MODE-NEXT: [[ADDR12:[0-9a-f]+]]: {{.*}} nop
-# NON-RELOC-MODE:                            {{.*}} ret
 
-# NON-RELOC-MODE: Disassembly of section .text.cold:
+# NON-RELOC-MODE: Disassembly of section .bolt.text:
 
 # NON-RELOC-MODE: <foo.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
-# NON-RELOC-MODE-NEXT:            {{.*}} bl  0x[[ADDR12]] <large_function>
 # NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
 
 # NON-RELOC-MODE: <bar.cold.0>:

>From 8f9188f0e6899bd5e7b58e66c6f070242a6caeb8 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 10:54:30 +0000
Subject: [PATCH 10/13] rename test functions

---
 bolt/test/AArch64/compare-and-branch.S | 68 +++++++++++++-------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index 6cb69da487949..e05f607879331 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -17,40 +17,40 @@
 # RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
 
-  .globl  foo
-  .type foo, %function
-foo:
-# FDATA: 1 foo #.entry_foo# 10
-.entry_foo:
+  .globl immediate_increment
+  .type immediate_increment, %function
+immediate_increment:
+# FDATA: 1 immediate_increment #.entry_immediate_increment# 10
+.entry_immediate_increment:
 ## Test immediate increment when inverting the branch.
-    cbgt x0, #0, .Lcold_foo
+    cbgt x0, #0, .Lcold_immediate_increment
     mov x0, #1
-.Lcold_foo:
+.Lcold_immediate_increment:
     ret
 
-  .globl  bar
-  .type bar, %function
-bar:
-# FDATA: 1 bar #.entry_bar# 10
-.entry_bar:
+  .globl immediate_decrement
+  .type immediate_decrement, %function
+immediate_decrement:
+# FDATA: 1 immediate_decrement #.entry_immediate_decrement# 10
+.entry_immediate_decrement:
 ## Test immediate decrement when inverting the branch.
-    cblo x0, #1, .Lcold_bar
+    cblo x0, #1, .Lcold_immediate_decrement
     mov x0, #2
-.Lcold_bar:
+.Lcold_immediate_decrement:
     ret
 
-  .globl  baz
-  .type baz, %function
-baz:
-# FDATA: 1 baz #.entry_baz# 10
-.entry_baz:
+  .globl register_swap
+  .type register_swap, %function
+register_swap:
+# FDATA: 1 register_swap #.entry_register_swap# 10
+.entry_register_swap:
 ## Test register swap when inverting the branch.
-    cbge x0, x1, .Lcold_baz
+    cbge x0, x1, .Lcold_register_swap
     mov x0, #3
-.Lcold_baz:
+.Lcold_register_swap:
     ret
 
-  .globl  irreversible
+  .globl irreversible
   .type irreversible, %function
 irreversible:
 # FDATA: 1 irreversible #.entry_irreversible# 10
@@ -75,17 +75,17 @@ irreversible:
 
 # RELOC-MODE: Disassembly of section .text:
 
-# RELOC-MODE: <foo>:
+# RELOC-MODE: <immediate_increment>:
 # RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
 
-# RELOC-MODE: <bar>:
+# RELOC-MODE: <immediate_decrement>:
 # RELOC-MODE-NEXT:            {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
 
-# RELOC-MODE: <baz>:
+# RELOC-MODE: <register_swap>:
 # RELOC-MODE-NEXT:            {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
 # RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
@@ -98,15 +98,15 @@ irreversible:
 
 # RELOC-MODE: Disassembly of section .text.cold:
 
-# RELOC-MODE: <foo.cold.0>:
+# RELOC-MODE: <immediate_increment.cold.0>:
 # RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
 # RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
 
-# RELOC-MODE: <bar.cold.0>:
+# RELOC-MODE: <immediate_decrement.cold.0>:
 # RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
 # RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
 
-# RELOC-MODE: <baz.cold.0>:
+# RELOC-MODE: <register_swap.cold.0>:
 # RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
 # RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
 
@@ -117,17 +117,17 @@ irreversible:
 
 # NON-RELOC-MODE: Disassembly of section .text:
 
-# NON-RELOC-MODE: <foo>:
+# NON-RELOC-MODE: <immediate_increment>:
 # NON-RELOC-MODE-NEXT:            {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
 
-# NON-RELOC-MODE: <bar>:
+# NON-RELOC-MODE: <immediate_decrement>:
 # NON-RELOC-MODE-NEXT:            {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
 
-# NON-RELOC-MODE: <baz>:
+# NON-RELOC-MODE: <register_swap>:
 # NON-RELOC-MODE-NEXT:            {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
 # NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
@@ -140,15 +140,15 @@ irreversible:
 
 # NON-RELOC-MODE: Disassembly of section .bolt.text:
 
-# NON-RELOC-MODE: <foo.cold.0>:
+# NON-RELOC-MODE: <immediate_increment.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
 # NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
 
-# NON-RELOC-MODE: <bar.cold.0>:
+# NON-RELOC-MODE: <immediate_decrement.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
 # NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
 
-# NON-RELOC-MODE: <baz.cold.0>:
+# NON-RELOC-MODE: <register_swap.cold.0>:
 # NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
 # NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
 

>From e70c30b021a66483efe61ffa7e0dbd16c33e8b60 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 3 Feb 2026 11:50:39 +0000
Subject: [PATCH 11/13] Add a negative test with cold target going out of range
 in compact code model

---
 .../AArch64/compare-and-branch-unsupported.S  | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 bolt/test/AArch64/compare-and-branch-unsupported.S

diff --git a/bolt/test/AArch64/compare-and-branch-unsupported.S b/bolt/test/AArch64/compare-and-branch-unsupported.S
new file mode 100644
index 0000000000000..668a45d2dd5b0
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-unsupported.S
@@ -0,0 +1,54 @@
+# This test checks that splitting functions which contain short range
+# conditional branches fails in compact code model due to the cold
+# target being far away, beyond the 128MB limit.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=COMPACT-CODE-FAILS
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ADRP
+
+  .globl foo
+  .type foo, %function
+foo:
+# FDATA: 1 foo #.entry_foo# 10
+.entry_foo:
+    cbgt x0, #0, .Lcold_foo
+    mov x0, #1
+.Lcold_foo:
+    ret
+
+## Add enough space for the cold section to be far away,
+## beyond the 128MB limit of compact code model.
+.space 0x8000000
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+
+# COMPACT-CODE-FAILS: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{.*}} is out of range of Branch26PCRel fixup at address {{.*}}
+
+
+# CHECK-ADRP: Disassembly of section .text:
+
+# CHECK-ADRP: <foo>:
+# CHECK-ADRP-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-ADRP-NEXT:            {{.*}} adrp x16,      0x[[ADDR1:[0-9a-f]+]]00 <foo+0x8000000>
+# CHECK-ADRP-NEXT:            {{.*}} add  x16, x16, #0x40
+# CHECK-ADRP-NEXT:            {{.*}} br   x16
+# CHECK-ADRP-NEXT: [[ADDR0]]: {{.*}} adrp x16,      0x[[ADDR1]]00 <foo+0x8000000>
+# CHECK-ADRP-NEXT:            {{.*}} add  x16, x16, #0x44
+# CHECK-ADRP-NEXT:            {{.*}} br   x16
+
+
+# CHECK-ADRP: Disassembly of section .text.cold:
+
+# CHECK-ADRP: <foo.cold.0>:
+# CHECK-ADRP-NEXT: [[ADDR1]]40: {{.*}} mov x0, #0x1 // =1
+# CHECK-ADRP-NEXT: [[ADDR1]]44: {{.*}} ret

>From 325fa5561a173fc3966458ceb9f4a1d52d502ccf Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 4 Feb 2026 14:48:49 +0000
Subject: [PATCH 12/13] Changes in this revision * Only support compact code
 model, diagnose the contrary * and negative test sfor the above * add tests
 for function splitting where target:   < 1KB   > 1KB   > 128MB * add tests
 for block reordering to exercise branch inversion * add test for block
 reordering causing out of bounds fixup value. * minor adjustment in function
 isReversibleBranch

---
 bolt/include/bolt/Core/MCPlusBuilder.h        |   6 +
 bolt/lib/Passes/LongJmp.cpp                   |   6 +
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |   8 +-
 .../AArch64/compare-and-branch-inversion.S    | 105 +++++++++++
 .../compare-and-branch-reorder-blocks.S       |  49 +++++
 .../AArch64/compare-and-branch-unsupported.S  |  58 +++---
 bolt/test/AArch64/compare-and-branch.S        | 173 ++++--------------
 7 files changed, 237 insertions(+), 168 deletions(-)
 create mode 100644 bolt/test/AArch64/compare-and-branch-inversion.S
 create mode 100644 bolt/test/AArch64/compare-and-branch-reorder-blocks.S

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e571e91d85135..cac5fe520ca16 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1747,6 +1747,12 @@ class MCPlusBuilder {
     return false;
   }
 
+  /// AArch64 uses this to perform diagnostics in the LongJmp pass.
+  virtual bool isShortRangeBranch(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   /// Receives a list of MCInst of the basic block to analyze and interpret the
   /// terminators of this basic block. TBB must be initialized with the original
   /// fall-through for this BB.
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 798e1ba08918a..f257db66347f7 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -69,6 +69,12 @@ static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) {
 }
 
 static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) {
+  if (BC.isAArch64() && BC.MIB->isShortRangeBranch(Inst) &&
+      !opts::CompactCodeModel) {
+    BC.errs() << "Short range branch " << Inst
+              << " not supported outside compact code model\n";
+    exit(1);
+  }
   return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) &&
          !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst);
 }
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 8a414e8ceba18..24e97063e96a2 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1911,9 +1911,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   bool isReversibleBranch(const MCInst &Inst) const override {
     if (isCompAndBranch(Inst)) {
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
-      if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() <= 0)
+      if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() == 0)
         return false;
-      if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() >= 63)
+      if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() == 63)
         return false;
     }
     return MCPlusBuilder::isReversibleBranch(Inst);
@@ -2345,6 +2345,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       convertJmpToTailCall(Inst);
   }
 
+  bool isShortRangeBranch(const MCInst &Inst) const override {
+    return isCompAndBranch(Inst);
+  }
+
   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
                      const MCSymbol *&TBB, const MCSymbol *&FBB,
                      MCInst *&CondBranch,
diff --git a/bolt/test/AArch64/compare-and-branch-inversion.S b/bolt/test/AArch64/compare-and-branch-inversion.S
new file mode 100644
index 0000000000000..338d367b3cbfe
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-inversion.S
@@ -0,0 +1,105 @@
+# This test checks that branch inversion works when reordering blocks which
+# contain short range conditional branches. Handles edge cases, like when
+# the immediate value is the upper or lower allowed value in which case the
+# transformation bails.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
+
+  .globl immediate_increment
+  .type immediate_increment, %function
+immediate_increment:
+.entry0:
+# FDATA: 1 immediate_increment #.entry0# 10
+    cbgt x0, #0, .exit0
+.cold0:
+# FDATA: 1 immediate_increment #.cold0# 1
+    mov x0, #1
+    ret
+.exit0:
+# FDATA: 1 immediate_increment #.exit0# 10
+    mov x0, #2
+    ret
+
+  .globl immediate_decrement
+  .type immediate_decrement, %function
+immediate_decrement:
+.entry1:
+# FDATA: 1 immediate_decrement #.entry1# 10
+    cblo x0, #1, .exit1
+.cold1:
+# FDATA: 1 immediate_decrement #.cold1# 1
+    mov x0, #1
+    ret
+.exit1:
+# FDATA: 1 immediate_decrement #.exit1# 10
+    mov x0, #2
+    ret
+
+  .globl register_swap
+  .type register_swap, %function
+register_swap:
+.entry2:
+# FDATA: 1 register_swap #.entry2# 10
+    cbge x0, x1, .exit2
+.cold2:
+# FDATA: 1 register_swap #.cold2# 1
+    mov x0, #1
+    ret
+.exit2:
+# FDATA: 1 register_swap #.exit2# 10
+    mov x0, #2
+    ret
+
+  .globl irreversible
+  .type irreversible, %function
+irreversible:
+.entry3:
+# FDATA: 1 irreversible #.entry3# 10
+    cbgt x0, #63, .exit3
+.cold3:
+# FDATA: 1 irreversible #.cold3# 1
+    mov x0, #1
+    ret
+.exit3:
+# FDATA: 1 irreversible #.exit3# 10
+    mov x0, #2
+    ret
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+# CHECK: Disassembly of section .text:
+
+# CHECK: <immediate_increment>:
+# CHECK-NEXT:            {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:            {{.*}} ret
+# CHECK-NEXT: [[ADDR0]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT:            {{.*}} ret
+
+# CHECK: <immediate_decrement>:
+# CHECK-NEXT:            {{.*}} cbhi x0, #0x0, 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:            {{.*}} ret
+# CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT:            {{.*}} ret
+
+# CHECK: <register_swap>:
+# CHECK-NEXT:            {{.*}} cbgt x1, x0, 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:            {{.*}} ret
+# CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT:            {{.*}} ret
+
+# CHECK: <irreversible>:
+# CHECK-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} b               0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: [[ADDR3]]: {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:            {{.*}} ret
+# CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT:            {{.*}} ret
diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
new file mode 100644
index 0000000000000..8bd3699b97b9b
--- /dev/null
+++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
@@ -0,0 +1,49 @@
+# This test checks that reordering blocks which contain short range
+# conditional branches may break if the target goes out of range.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=300
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=FIXUP_OUT_OF_RANGE
+
+  .globl reorder_blocks
+  .type reorder_blocks, %function
+reorder_blocks:
+.entry:
+# FDATA: 1 reorder_blocks #.entry# 10
+    cbgt x0, #0, .cold_exit
+.skip:
+# FDATA: 1 reorder_blocks #.skip# 10
+    b .hot_exit
+.cold_exit:
+# FDATA: 1 reorder_blocks #.cold_exit# 1
+    mov x0, #1
+    ret
+.hot_exit:
+# FDATA: 1 reorder_blocks #.hot_exit# 10
+  .rept NUM_NOPS
+    nop
+  .endr
+  mov x0, #2
+  ret
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
+
+# CHECK: Disassembly of section .text:
+
+# CHECK: <reorder_blocks>:
+# CHECK-NEXT:           {{.*}} cbgt x0, #0x0, 0x[[ADDR:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:           {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:           {{.*}} ret
+# CHECK-NEXT: [[ADDR]]: {{.*}} mov  x0, #0x1 // =1
+# CHECK-NEXT:           {{.*}} ret
+
+# FIXUP_OUT_OF_RANGE: error: fixup value out of range
diff --git a/bolt/test/AArch64/compare-and-branch-unsupported.S b/bolt/test/AArch64/compare-and-branch-unsupported.S
index 668a45d2dd5b0..0bab46b43fe24 100644
--- a/bolt/test/AArch64/compare-and-branch-unsupported.S
+++ b/bolt/test/AArch64/compare-and-branch-unsupported.S
@@ -1,54 +1,48 @@
 # This test checks that splitting functions which contain short range
-# conditional branches fails in compact code model due to the cold
-# target being far away, beyond the 128MB limit.
+# conditional branches does not work outside compact code model.
 
 # REQUIRES: system-linux, asserts
 
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=COMPACT-CODE-FAILS
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions 2>&1 \
+# RUN:   | FileCheck %s
 
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256 -DRESERVE_SPACE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ADRP
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --keep-nops 2>&1 \
+# RUN:   | FileCheck %s
+
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions 2>&1 \
+# RUN:   | FileCheck %s
 
   .globl foo
   .type foo, %function
 foo:
-# FDATA: 1 foo #.entry_foo# 10
 .entry_foo:
+# FDATA: 1 foo #.entry_foo# 10
     cbgt x0, #0, .Lcold_foo
     mov x0, #1
 .Lcold_foo:
     ret
 
-## Add enough space for the cold section to be far away,
-## beyond the 128MB limit of compact code model.
+  .globl large_function
+  .type large_function, %function
+large_function:
+.entry_large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+  .rept NUM_NOPS
+    nop
+  .endr
+    ret
+
+.if RESERVE_SPACE
 .space 0x8000000
+.endif
 
 ## Force relocation mode.
 .reloc 0, R_AARCH64_NONE
 
-
-# COMPACT-CODE-FAILS: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{.*}} is out of range of Branch26PCRel fixup at address {{.*}}
-
-
-# CHECK-ADRP: Disassembly of section .text:
-
-# CHECK-ADRP: <foo>:
-# CHECK-ADRP-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# CHECK-ADRP-NEXT:            {{.*}} adrp x16,      0x[[ADDR1:[0-9a-f]+]]00 <foo+0x8000000>
-# CHECK-ADRP-NEXT:            {{.*}} add  x16, x16, #0x40
-# CHECK-ADRP-NEXT:            {{.*}} br   x16
-# CHECK-ADRP-NEXT: [[ADDR0]]: {{.*}} adrp x16,      0x[[ADDR1]]00 <foo+0x8000000>
-# CHECK-ADRP-NEXT:            {{.*}} add  x16, x16, #0x44
-# CHECK-ADRP-NEXT:            {{.*}} br   x16
-
-
-# CHECK-ADRP: Disassembly of section .text.cold:
-
-# CHECK-ADRP: <foo.cold.0>:
-# CHECK-ADRP-NEXT: [[ADDR1]]40: {{.*}} mov x0, #0x1 // =1
-# CHECK-ADRP-NEXT: [[ADDR1]]44: {{.*}} ret
+# CHECK: Short range branch {{.*}} not supported outside compact code model
diff --git a/bolt/test/AArch64/compare-and-branch.S b/bolt/test/AArch64/compare-and-branch.S
index e05f607879331..b9309a1ec69a1 100644
--- a/bolt/test/AArch64/compare-and-branch.S
+++ b/bolt/test/AArch64/compare-and-branch.S
@@ -1,157 +1,62 @@
 # This test checks that splitting functions which contain short range
 # conditional branches works in compact code model without relying on
-# relocations. Also checks that splitting works in non-relocation mode,
-# in order to test the branch inversion on those instructions.
+# relocations.
 
 # REQUIRES: system-linux, asserts
 
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=1
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
 # RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model
-# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-RELOCS
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELOC-MODE
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
 
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DRELOCATION_MODE=0
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256 -DRESERVE_SPACE=0
 # RUN: link_fdata --no-lbr %s %t %t.fdata
-# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions
-# RUN: llvm-objdump -r %t | FileCheck %s --check-prefix=CHECK-NO-RELOCS
-# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NON-RELOC-MODE
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model --keep-nops
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s
 
-  .globl immediate_increment
-  .type immediate_increment, %function
-immediate_increment:
-# FDATA: 1 immediate_increment #.entry_immediate_increment# 10
-.entry_immediate_increment:
-## Test immediate increment when inverting the branch.
-    cbgt x0, #0, .Lcold_immediate_increment
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=0 -DRESERVE_SPACE=1
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions --compact-code-model 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=BEYOND-128MB
+
+  .globl foo
+  .type foo, %function
+foo:
+.entry_foo:
+# FDATA: 1 foo #.entry_foo# 10
+    cbgt x0, #0, .Lcold_foo
     mov x0, #1
-.Lcold_immediate_increment:
-    ret
-
-  .globl immediate_decrement
-  .type immediate_decrement, %function
-immediate_decrement:
-# FDATA: 1 immediate_decrement #.entry_immediate_decrement# 10
-.entry_immediate_decrement:
-## Test immediate decrement when inverting the branch.
-    cblo x0, #1, .Lcold_immediate_decrement
-    mov x0, #2
-.Lcold_immediate_decrement:
+.Lcold_foo:
     ret
 
-  .globl register_swap
-  .type register_swap, %function
-register_swap:
-# FDATA: 1 register_swap #.entry_register_swap# 10
-.entry_register_swap:
-## Test register swap when inverting the branch.
-    cbge x0, x1, .Lcold_register_swap
-    mov x0, #3
-.Lcold_register_swap:
+  .globl large_function
+  .type large_function, %function
+large_function:
+.entry_large_function:
+# FDATA: 1 large_function #.entry_large_function# 10
+  .rept NUM_NOPS
+    nop
+  .endr
     ret
 
-  .globl irreversible
-  .type irreversible, %function
-irreversible:
-# FDATA: 1 irreversible #.entry_irreversible# 10
-.entry_irreversible:
-    cbgt x0, #63, .Lcold_irreversible
-    mov x0, #4
-.Lcold_irreversible:
-    ret
+.if RESERVE_SPACE
+.space 0x8000000
+.endif
 
 ## Force relocation mode.
-.if RELOCATION_MODE
 .reloc 0, R_AARCH64_NONE
-.endif
-
-
-# CHECK-RELOCS: R_AARCH64_NONE *ABS*
-# CHECK-RELOCS-NOT: R_AARCH64_
-
-
-# CHECK-NO-RELOCS-NOT: R_AARCH64_
-
-
-# RELOC-MODE: Disassembly of section .text:
-
-# RELOC-MODE: <immediate_increment>:
-# RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <immediate_decrement>:
-# RELOC-MODE-NEXT:            {{.*}} cblo x0, #0x1, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <register_swap>:
-# RELOC-MODE-NEXT:            {{.*}} cbge x0, x1, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
-
-# RELOC-MODE: <irreversible>:
-# RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
-# RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-
-
-# RELOC-MODE: Disassembly of section .text.cold:
-
-# RELOC-MODE: <immediate_increment.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
-# RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} ret
-
-# RELOC-MODE: <immediate_decrement.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x2 // =2
-# RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} ret
-
-# RELOC-MODE: <register_swap.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} mov x0, #0x3 // =3
-# RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} ret
-
-# RELOC-MODE: <irreversible.cold.0>:
-# RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
-# RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
-
-
-# NON-RELOC-MODE: Disassembly of section .text:
-
-# NON-RELOC-MODE: <immediate_increment>:
-# NON-RELOC-MODE-NEXT:            {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <immediate_decrement>:
-# NON-RELOC-MODE-NEXT:            {{.*}} cbhi x0, #0x0, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT:            {{.*}} b              0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR3]]: {{.*}} b              0x[[ADDR5:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <register_swap>:
-# NON-RELOC-MODE-NEXT:            {{.*}} cbgt x1, x0, 0x[[ADDR6:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT:            {{.*}} b            0x[[ADDR7:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR6]]: {{.*}} b            0x[[ADDR8:[0-9a-f]+]] <{{.*}}>
-
-# NON-RELOC-MODE: <irreversible>:
-# NON-RELOC-MODE-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR9:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT:            {{.*}} b               0x[[ADDR10:[0-9a-f]+]] <{{.*}}>
-# NON-RELOC-MODE-NEXT: [[ADDR9]]: {{.*}} b               0x[[ADDR11:[0-9a-f]+]] <{{.*}}>
-
 
-# NON-RELOC-MODE: Disassembly of section .bolt.text:
+# CHECK: Disassembly of section .text:
 
-# NON-RELOC-MODE: <immediate_increment.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
-# NON-RELOC-MODE-NEXT: [[ADDR1]]: {{.*}} ret
+# CHECK: <foo>:
+# CHECK-NEXT:            {{.*}} cbgt x0, #0x0, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} b              0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: [[ADDR0]]: {{.*}} b              0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
 
-# NON-RELOC-MODE: <immediate_decrement.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR5]]: {{.*}} mov x0, #0x2 // =2
-# NON-RELOC-MODE-NEXT: [[ADDR4]]: {{.*}} ret
+# CHECK: Disassembly of section .text.cold:
 
-# NON-RELOC-MODE: <register_swap.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR8]]: {{.*}} mov x0, #0x3 // =3
-# NON-RELOC-MODE-NEXT: [[ADDR7]]: {{.*}} ret
+# CHECK: <foo.cold.0>:
+# CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: [[ADDR2]]: {{.*}} ret
 
-# NON-RELOC-MODE: <irreversible.cold.0>:
-# NON-RELOC-MODE-NEXT: [[ADDR10]]: {{.*}} mov x0, #0x4 // =4
-# NON-RELOC-MODE-NEXT: [[ADDR11]]: {{.*}} ret
+# BEYOND-128MB: BOLT-ERROR: JITLink failed: In graph in-memory object file, section .text: relocation target {{0x[0-9a-f]+}} (<anonymous symbol>) is out of range of Branch26PCRel fixup at address {{0x[0-9a-f]+}}

>From 1a5373a88f30acc2246d8e97c1a7e510449a5ee1 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 4 Feb 2026 15:14:39 +0000
Subject: [PATCH 13/13] Change NUM_NOPS from 300 to 256 in test script

---
 bolt/test/AArch64/compare-and-branch-reorder-blocks.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
index 8bd3699b97b9b..10ec75d18dd96 100644
--- a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
+++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
@@ -8,7 +8,7 @@
 # RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s
 
-# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=300
+# RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256
 # RUN: link_fdata --no-lbr %s %t %t.fdata
 # RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=FIXUP_OUT_OF_RANGE



More information about the llvm-commits mailing list