[llvm] [AArch64] Optimize CBZ wzr and friends. (PR #161508)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 5 07:53:02 PST 2025


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/161508

>From 161d07ed945a5715c21556679d0d1c5851a8099f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 4 Dec 2025 09:37:45 +0000
Subject: [PATCH 1/2] [AArch64] Optimize CBZ wzr and friends.

In certain situations, especially with zero phi operands propagated after tail
duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It
can can be introduced late in the pipeline.

This patch adds a basic pass to fold them away to either a direct branch or
removing the instruction entirely. It runs quite late n the pipeline, so doesnt
fit into any of the existing passes. It only needs to look at the terminators
to each BB, so the new pass should have a limited in compile-time impact.
---
 llvm/lib/Target/AArch64/AArch64.h             |   2 +
 .../AArch64RedundantCondBranchPass.cpp        | 113 ++++++++++++++++++
 .../Target/AArch64/AArch64TargetMachine.cpp   |   3 +
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 +
 llvm/test/CodeGen/AArch64/O3-pipeline.ll      |   1 +
 llvm/test/CodeGen/AArch64/arm64-rev.ll        |  10 +-
 .../CodeGen/AArch64/arm64-shrink-wrapping.ll  |  18 +--
 .../block-placement-optimize-branches.ll      |  12 +-
 llvm/test/CodeGen/AArch64/cbz_wzr.mir         |  34 ++----
 llvm/test/CodeGen/AArch64/pr164181.ll         |   3 +-
 llvm/test/CodeGen/AArch64/pr166870.ll         |   1 -
 llvm/test/CodeGen/AArch64/tbz-tbnz.ll         |  15 +--
 ...ch64_generated_funcs.ll.generated.expected |   4 +-
 ...64_generated_funcs.ll.nogenerated.expected |   4 +-
 14 files changed, 159 insertions(+), 62 deletions(-)
 create mode 100644 llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp

diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 4ae18385029a3..a8e15c338352a 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -33,6 +33,7 @@ class ModulePass;
 
 FunctionPass *createAArch64DeadRegisterDefinitions();
 FunctionPass *createAArch64RedundantCopyEliminationPass();
+FunctionPass *createAArch64RedundantCondBranchPass();
 FunctionPass *createAArch64CondBrTuning();
 FunctionPass *createAArch64CompressJumpTablesPass();
 FunctionPass *createAArch64ConditionalCompares();
@@ -103,6 +104,7 @@ void initializeAArch64PostSelectOptimizePass(PassRegistry &);
 void initializeAArch64PreLegalizerCombinerPass(PassRegistry &);
 void initializeAArch64PromoteConstantPass(PassRegistry&);
 void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
+void initializeAArch64RedundantCondBranchPass(PassRegistry &);
 void initializeAArch64SIMDInstrOptPass(PassRegistry &);
 void initializeAArch64SLSHardeningPass(PassRegistry &);
 void initializeAArch64SpeculationHardeningPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
new file mode 100644
index 0000000000000..1bd47e24809ed
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
@@ -0,0 +1,113 @@
+//=- AArch64RedundantCondBranch.cpp - Remove redundant cbz wzr --------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Late in the pipeline, especially with zero phi operands propagated after tail
+// duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. This
+// simple pass looks at the terminators to a block, removing the redundant
+// instructions where necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-redundantcondbranch"
+
+namespace {
+class AArch64RedundantCondBranch : public MachineFunctionPass {
+public:
+  static char ID;
+  AArch64RedundantCondBranch() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().setNoVRegs();
+  }
+  StringRef getPassName() const override {
+    return "AArch64 Redundant Conditional Branch Elimination";
+  }
+};
+char AArch64RedundantCondBranch::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
+                "AArch64 Redundant Conditional Branch Elimination pass", false,
+                false)
+
+static bool optimizeTerminators(MachineBasicBlock *MBB) {
+  for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
+    unsigned Opc = MI.getOpcode();
+    switch (Opc) {
+    case AArch64::CBZW:
+    case AArch64::CBZX:
+    case AArch64::TBZW:
+    case AArch64::TBZX:
+      // CBZ XZR -> B
+      if (MI.getOperand(0).getReg() == AArch64::WZR ||
+          MI.getOperand(0).getReg() == AArch64::XZR) {
+        LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
+        MachineBasicBlock *Target =
+            MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
+                .getMBB();
+        MachineBasicBlock *MBB = MI.getParent();
+        SmallVector<MachineBasicBlock *> Succs(MBB->successors());
+        for (auto *S : Succs)
+          if (S != Target)
+            MBB->removeSuccessor(S);
+        SmallVector<MachineInstr *> DeadInstrs;
+        for (auto It = MI.getIterator(); It != MBB->end(); ++It)
+          DeadInstrs.push_back(&*It);
+        const MachineFunction *MF = MBB->getParent();
+        const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+        BuildMI(MBB, MI.getDebugLoc(), TII->get(AArch64::B)).addMBB(Target);
+        for (auto It : DeadInstrs)
+          It->eraseFromParent();
+        return true;
+      }
+      break;
+    case AArch64::CBNZW:
+    case AArch64::CBNZX:
+    case AArch64::TBNZW:
+    case AArch64::TBNZX:
+      // CBNZ XZR -> nop
+      if (MI.getOperand(0).getReg() == AArch64::WZR ||
+          MI.getOperand(0).getReg() == AArch64::XZR) {
+        LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
+        MachineBasicBlock *Target =
+            MI.getOperand((Opc == AArch64::TBNZW || Opc == AArch64::TBNZX) ? 2
+                                                                           : 1)
+                .getMBB();
+        MI.getParent()->removeSuccessor(Target);
+        MI.eraseFromParent();
+        return true;
+      }
+      break;
+    }
+  }
+  return false;
+}
+
+bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF)
+    Changed |= optimizeTerminators(&MBB);
+  return Changed;
+}
+
+FunctionPass *llvm::createAArch64RedundantCondBranchPass() {
+  return new AArch64RedundantCondBranch();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 090789ebb9a4a..346e18e553c5e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -260,6 +260,7 @@ LLVMInitializeAArch64Target() {
   initializeAArch64PostSelectOptimizePass(PR);
   initializeAArch64PromoteConstantPass(PR);
   initializeAArch64RedundantCopyEliminationPass(PR);
+  initializeAArch64RedundantCondBranchPass(PR);
   initializeAArch64StorePairSuppressPass(PR);
   initializeFalkorHWPFFixPass(PR);
   initializeFalkorMarkStridedAccessesLegacyPass(PR);
@@ -862,6 +863,8 @@ void AArch64PassConfig::addPreEmitPass() {
   if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
       EnableAArch64CopyPropagation)
     addPass(createMachineCopyPropagationPass(true));
+  if (TM->getOptLevel() != CodeGenOptLevel::None)
+    addPass(createAArch64RedundantCondBranchPass());
 
   addPass(createAArch64A53Fix835769());
 
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 285d646293eb7..3334b3689e03f 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_target(AArch64CodeGen
   AArch64CompressJumpTables.cpp
   AArch64ConditionOptimizer.cpp
   AArch64RedundantCopyElimination.cpp
+  AArch64RedundantCondBranchPass.cpp
   AArch64ISelDAGToDAG.cpp
   AArch64ISelLowering.cpp
   AArch64InstrInfo.cpp
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 2102029e608ab..e8ea55e027aec 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -229,6 +229,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       AArch64 load / store optimization pass
 ; CHECK-NEXT:       Machine Copy Propagation Pass
+; CHECK-NEXT:       AArch64 Redundant Conditional Branch Elimination
 ; CHECK-NEXT:       Workaround A53 erratum 835769 pass
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       Remove Loads Into Fake Uses
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 84557b441853a..5980e3214d0da 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
 define void @test_rev16_truncstore() {
 ; CHECK-SD-LABEL: test_rev16_truncstore:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    cbnz wzr, .LBB38_2
 ; CHECK-SD-NEXT:  .LBB38_1: // %cleanup
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldrh w8, [x8]
 ; CHECK-SD-NEXT:    rev16 w8, w8
 ; CHECK-SD-NEXT:    strh w8, [x8]
-; CHECK-SD-NEXT:    cbz wzr, .LBB38_1
-; CHECK-SD-NEXT:  .LBB38_2: // %fail
-; CHECK-SD-NEXT:    ret
+; CHECK-SD-NEXT:    b .LBB38_1
 ;
 ; CHECK-GI-LABEL: test_rev16_truncstore:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    tbnz wzr, #0, .LBB38_2
 ; CHECK-GI-NEXT:  .LBB38_1: // %cleanup
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-GI-NEXT:    ldrh w8, [x8]
 ; CHECK-GI-NEXT:    rev w8, w8
 ; CHECK-GI-NEXT:    lsr w8, w8, #16
 ; CHECK-GI-NEXT:    strh w8, [x8]
-; CHECK-GI-NEXT:    tbz wzr, #0, .LBB38_1
-; CHECK-GI-NEXT:  .LBB38_2: // %fail
-; CHECK-GI-NEXT:    ret
+; CHECK-GI-NEXT:    b .LBB38_1
 entry:
   br label %body
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 5806bcf0dacf1..724c8b3fc9170 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -735,7 +735,6 @@ define void @infiniteloop() {
 ; ENABLE-NEXT:    .cfi_offset w29, -16
 ; ENABLE-NEXT:    .cfi_offset w19, -24
 ; ENABLE-NEXT:    .cfi_offset w20, -32
-; ENABLE-NEXT:    cbnz wzr, LBB10_3
 ; ENABLE-NEXT:  ; %bb.1: ; %if.then
 ; ENABLE-NEXT:    sub x19, sp, #16
 ; ENABLE-NEXT:    mov sp, x19
@@ -746,7 +745,7 @@ define void @infiniteloop() {
 ; ENABLE-NEXT:    add w20, w0, w20
 ; ENABLE-NEXT:    str w20, [x19]
 ; ENABLE-NEXT:    b LBB10_2
-; ENABLE-NEXT:  LBB10_3: ; %if.end
+; ENABLE-NEXT:  ; %bb.3: ; %if.end
 ; ENABLE-NEXT:    sub sp, x29, #16
 ; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -762,7 +761,6 @@ define void @infiniteloop() {
 ; DISABLE-NEXT:    .cfi_offset w29, -16
 ; DISABLE-NEXT:    .cfi_offset w19, -24
 ; DISABLE-NEXT:    .cfi_offset w20, -32
-; DISABLE-NEXT:    cbnz wzr, LBB10_3
 ; DISABLE-NEXT:  ; %bb.1: ; %if.then
 ; DISABLE-NEXT:    sub x19, sp, #16
 ; DISABLE-NEXT:    mov sp, x19
@@ -773,7 +771,7 @@ define void @infiniteloop() {
 ; DISABLE-NEXT:    add w20, w0, w20
 ; DISABLE-NEXT:    str w20, [x19]
 ; DISABLE-NEXT:    b LBB10_2
-; DISABLE-NEXT:  LBB10_3: ; %if.end
+; DISABLE-NEXT:  ; %bb.3: ; %if.end
 ; DISABLE-NEXT:    sub sp, x29, #16
 ; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -808,7 +806,6 @@ define void @infiniteloop2() {
 ; ENABLE-NEXT:    .cfi_offset w29, -16
 ; ENABLE-NEXT:    .cfi_offset w19, -24
 ; ENABLE-NEXT:    .cfi_offset w20, -32
-; ENABLE-NEXT:    cbnz wzr, LBB11_3
 ; ENABLE-NEXT:  ; %bb.1: ; %if.then
 ; ENABLE-NEXT:    sub x8, sp, #16
 ; ENABLE-NEXT:    mov sp, x8
@@ -825,7 +822,7 @@ define void @infiniteloop2() {
 ; ENABLE-NEXT:    nop
 ; ENABLE-NEXT:    ; InlineAsm End
 ; ENABLE-NEXT:    b LBB11_2
-; ENABLE-NEXT:  LBB11_3: ; %if.end
+; ENABLE-NEXT:  ; %bb.3: ; %if.end
 ; ENABLE-NEXT:    sub sp, x29, #16
 ; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -841,7 +838,6 @@ define void @infiniteloop2() {
 ; DISABLE-NEXT:    .cfi_offset w29, -16
 ; DISABLE-NEXT:    .cfi_offset w19, -24
 ; DISABLE-NEXT:    .cfi_offset w20, -32
-; DISABLE-NEXT:    cbnz wzr, LBB11_3
 ; DISABLE-NEXT:  ; %bb.1: ; %if.then
 ; DISABLE-NEXT:    sub x8, sp, #16
 ; DISABLE-NEXT:    mov sp, x8
@@ -858,7 +854,7 @@ define void @infiniteloop2() {
 ; DISABLE-NEXT:    nop
 ; DISABLE-NEXT:    ; InlineAsm End
 ; DISABLE-NEXT:    b LBB11_2
-; DISABLE-NEXT:  LBB11_3: ; %if.end
+; DISABLE-NEXT:  ; %bb.3: ; %if.end
 ; DISABLE-NEXT:    sub sp, x29, #16
 ; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -893,7 +889,6 @@ if.end:
 define void @infiniteloop3() {
 ; ENABLE-LABEL: infiniteloop3:
 ; ENABLE:       ; %bb.0: ; %entry
-; ENABLE-NEXT:    cbnz wzr, LBB12_5
 ; ENABLE-NEXT:  ; %bb.1: ; %loop2a.preheader
 ; ENABLE-NEXT:    mov x8, xzr
 ; ENABLE-NEXT:    mov x9, xzr
@@ -912,12 +907,11 @@ define void @infiniteloop3() {
 ; ENABLE-NEXT:    mov x8, x10
 ; ENABLE-NEXT:    mov x11, x10
 ; ENABLE-NEXT:    b LBB12_3
-; ENABLE-NEXT:  LBB12_5: ; %end
+; ENABLE-NEXT:  ; %bb.5: ; %end
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: infiniteloop3:
 ; DISABLE:       ; %bb.0: ; %entry
-; DISABLE-NEXT:    cbnz wzr, LBB12_5
 ; DISABLE-NEXT:  ; %bb.1: ; %loop2a.preheader
 ; DISABLE-NEXT:    mov x8, xzr
 ; DISABLE-NEXT:    mov x9, xzr
@@ -936,7 +930,7 @@ define void @infiniteloop3() {
 ; DISABLE-NEXT:    mov x8, x10
 ; DISABLE-NEXT:    mov x11, x10
 ; DISABLE-NEXT:    b LBB12_3
-; DISABLE-NEXT:  LBB12_5: ; %end
+; DISABLE-NEXT:  ; %bb.5: ; %end
 ; DISABLE-NEXT:    ret
 entry:
   br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e..7c3a567d1b336 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -8,21 +8,20 @@
 define i8 @foo_optsize(i32 %v4) optsize {
 ; CHECK-LABEL: foo_optsize:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz wzr, .LBB0_2
+; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: // %b1
 ; CHECK-NEXT:    cbnz w0, .LBB0_4
-; CHECK-NEXT:  .LBB0_3: // %b2
+; CHECK-NEXT:  // %bb.3: // %b2
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_4: // %b1
 ; CHECK-NEXT:    cmp w0, #1
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.5: // %b3
-; CHECK-NEXT:    cbz wzr, .LBB0_1
-; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:    b .LBB0_1
 entry:
   %v2 = icmp eq i32 0, 0
   br i1 %v2, label %b1, label %b4
@@ -48,20 +47,19 @@ b4:
 define i8 @foo_optspeed(i32 %v4) {
 ; CHECK-LABEL: foo_optspeed:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz wzr, .LBB1_2
+; CHECK-NEXT:    b .LBB1_2
 ; CHECK-NEXT:  .LBB1_1:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB1_2: // %b1
 ; CHECK-NEXT:    cbnz w0, .LBB1_4
-; CHECK-NEXT:  .LBB1_3: // %b2
+; CHECK-NEXT:  // %bb.3: // %b2
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB1_4: // %b1
 ; CHECK-NEXT:    cmp w0, #1
 ; CHECK-NEXT:    b.ne .LBB1_1
 ; CHECK-NEXT:  // %bb.5: // %b3
-; CHECK-NEXT:    cbnz wzr, .LBB1_3
 ; CHECK-NEXT:    b .LBB1_1
 entry:
   %v2 = icmp eq i32 0, 0
diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
index 7deea56ba23a1..9e70d2a12b0fd 100644
--- a/llvm/test/CodeGen/AArch64/cbz_wzr.mir
+++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp -mcp-use-is-copy-instr | FileCheck %s
+# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp,aarch64-redundantcondbranch -mcp-use-is-copy-instr | FileCheck %s
 
 ---
 name:            cbz_wzr
@@ -7,10 +7,10 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: cbz_wzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBZW $wzr, %bb.2
+  ; CHECK-NEXT:   B %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
@@ -39,11 +39,9 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: cbnz_wzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZW $wzr, %bb.2
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
@@ -71,10 +69,10 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: tbz_wzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   TBZW $wzr, 0, %bb.2
+  ; CHECK-NEXT:   B %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
@@ -103,11 +101,9 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: tbnz_wzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   TBNZW $wzr, 0, %bb.2
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
@@ -136,10 +132,10 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: cbz_xzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBZX $xzr, %bb.2
+  ; CHECK-NEXT:   B %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
@@ -168,11 +164,9 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: cbnz_xzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZX $xzr, %bb.2
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
@@ -200,10 +194,10 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: tbz_xzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   TBZX $xzr, 0, %bb.2
+  ; CHECK-NEXT:   B %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
@@ -232,11 +226,9 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: tbnz_xzr
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   TBNZX $xzr, 0, %bb.2
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index 18732ae5ae300..abb090ccbcaed 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -288,7 +288,6 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    // Parent Loop BB0_8 Depth=3
 ; CHECK-NEXT:    // Parent Loop BB0_10 Depth=4
 ; CHECK-NEXT:    // => This Inner Loop Header: Depth=5
-; CHECK-NEXT:    cbnz wzr, .LBB0_30
 ; CHECK-NEXT:  // %bb.29: // %if.then222.us
 ; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
 ; CHECK-NEXT:    adrp x27, :got:var_32
@@ -299,7 +298,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    bic w25, w8, w8, asr #31
 ; CHECK-NEXT:    b .LBB0_31
 ; CHECK-NEXT:    .p2align 5, , 16
-; CHECK-NEXT:  .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:  // %bb.30:
 ; CHECK-NEXT:    mov w25, wzr
 ; CHECK-NEXT:  .LBB0_31: // %if.end239.us
 ; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll
index d6f99c67a01ff..48967d30485ab 100644
--- a/llvm/test/CodeGen/AArch64/pr166870.ll
+++ b/llvm/test/CodeGen/AArch64/pr166870.ll
@@ -26,7 +26,6 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi
 ; CHECK-NEXT:    mov x21, x1
 ; CHECK-NEXT:    bl baz
 ; CHECK-NEXT:    mov w0, #0 // =0x0
-; CHECK-NEXT:    cbnz wzr, .LBB0_11
 ; CHECK-NEXT:  // %bb.5: // %bb6
 ; CHECK-NEXT:    mov w10, #1 // =0x1
 ; CHECK-NEXT:    cbnz w10, .LBB0_11
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index 6946cc23d867d..72e3a2ef59677 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -586,13 +586,12 @@ define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    tbz w0, #0, .LBB20_2
 ; CHECK-SD-NEXT:  // %bb.1:
-; CHECK-SD-NEXT:    tbnz wzr, #0, .LBB20_3
 ; CHECK-SD-NEXT:    b .LBB20_4
 ; CHECK-SD-NEXT:  .LBB20_2: // %opnfil.exit.thread
 ; CHECK-SD-NEXT:    mov w8, #1 // =0x1
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:    tbz w8, #0, .LBB20_4
-; CHECK-SD-NEXT:  .LBB20_3: // %if.else25
+; CHECK-SD-NEXT:  // %bb.3: // %if.else25
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:  .LBB20_4: // %common.ret
 ; CHECK-SD-NEXT:    mov x0, xzr
@@ -656,7 +655,7 @@ define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
 ; CHECK-SD-NEXT:    b .LBB21_4
 ; CHECK-SD-NEXT:  .LBB21_2: // %opnfil.exit.thread
 ; CHECK-SD-NEXT:    str wzr, [x1]
-; CHECK-SD-NEXT:    tbz wzr, #0, .LBB21_4
+; CHECK-SD-NEXT:    b .LBB21_4
 ; CHECK-SD-NEXT:  .LBB21_3: // %if.else25
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:  .LBB21_4: // %common.ret
@@ -716,13 +715,12 @@ define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    tbz w0, #0, .LBB22_2
 ; CHECK-SD-NEXT:  // %bb.1:
-; CHECK-SD-NEXT:    cbnz wzr, .LBB22_3
 ; CHECK-SD-NEXT:    b .LBB22_4
 ; CHECK-SD-NEXT:  .LBB22_2: // %opnfil.exit.thread
 ; CHECK-SD-NEXT:    mov w8, #10 // =0xa
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:    cbz w8, .LBB22_4
-; CHECK-SD-NEXT:  .LBB22_3: // %if.else25
+; CHECK-SD-NEXT:  // %bb.3: // %if.else25
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:  .LBB22_4: // %common.ret
 ; CHECK-SD-NEXT:    mov x0, xzr
@@ -784,7 +782,7 @@ define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
 ; CHECK-SD-NEXT:    b .LBB23_4
 ; CHECK-SD-NEXT:  .LBB23_2: // %opnfil.exit.thread
 ; CHECK-SD-NEXT:    str wzr, [x1]
-; CHECK-SD-NEXT:    cbz wzr, .LBB23_4
+; CHECK-SD-NEXT:    b .LBB23_4
 ; CHECK-SD-NEXT:  .LBB23_3: // %if.else25
 ; CHECK-SD-NEXT:    str wzr, [x1]
 ; CHECK-SD-NEXT:  .LBB23_4: // %common.ret
@@ -848,12 +846,11 @@ define i1 @avifSequenceHeaderParse() {
 ; CHECK-SD-NEXT:  .LBB24_2: // %bb1
 ; CHECK-SD-NEXT:    cbz w8, .LBB24_4
 ; CHECK-SD-NEXT:  // %bb.3:
-; CHECK-SD-NEXT:    tbz xzr, #63, .LBB24_1
-; CHECK-SD-NEXT:    b .LBB24_5
+; CHECK-SD-NEXT:    b .LBB24_1
 ; CHECK-SD-NEXT:  .LBB24_4: // %bb2
 ; CHECK-SD-NEXT:    mov w8, #1 // =0x1
 ; CHECK-SD-NEXT:    tbz x8, #63, .LBB24_1
-; CHECK-SD-NEXT:  .LBB24_5: // %bb4
+; CHECK-SD-NEXT:  // %bb.5: // %bb4
 ; CHECK-SD-NEXT:    mov w8, #1 // =0x1
 ; CHECK-SD-NEXT:    mov w0, wzr
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index 97b17d98d3472..f4815dc331056 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -71,14 +71,16 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    stur xzr, [x29, #-8]
-; CHECK-NEXT:    cbz wzr, .LBB0_3
+; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    str w8, [sp, #16]
 ; CHECK-NEXT:    ldur w8, [x29, #-8]
 ; CHECK-NEXT:    cbz w8, .LBB0_4
 ; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    str w8, [sp, #16]
 ; CHECK-NEXT:    b .LBB0_5
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
index 3d379ea1faf5f..bc3500522672d 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
@@ -12,14 +12,16 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    stur xzr, [x29, #-8]
-; CHECK-NEXT:    cbz wzr, .LBB0_3
+; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    str w8, [sp, #16]
 ; CHECK-NEXT:    ldur w8, [x29, #-8]
 ; CHECK-NEXT:    cbz w8, .LBB0_4
 ; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    str w8, [sp, #16]
 ; CHECK-NEXT:    b .LBB0_5

>From 6af5310b16775580da5aee26849d7be481ebfed3 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 5 Dec 2025 15:52:15 +0000
Subject: [PATCH 2/2] Address comments and cleanup

---
 .../AArch64RedundantCondBranchPass.cpp        | 36 ++++++++-----------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
index 1bd47e24809ed..1b990796ec9da 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
@@ -1,4 +1,4 @@
-//=- AArch64RedundantCondBranch.cpp - Remove redundant cbz wzr --------------=//
+//=- AArch64RedundantCondBranch.cpp - Remove redundant conditional branches -=//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -45,7 +45,8 @@ INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
                 "AArch64 Redundant Conditional Branch Elimination pass", false,
                 false)
 
-static bool optimizeTerminators(MachineBasicBlock *MBB) {
+static bool optimizeTerminators(MachineBasicBlock *MBB,
+                                const TargetInstrInfo &TII) {
   for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
     unsigned Opc = MI.getOpcode();
     switch (Opc) {
@@ -53,26 +54,20 @@ static bool optimizeTerminators(MachineBasicBlock *MBB) {
     case AArch64::CBZX:
     case AArch64::TBZW:
     case AArch64::TBZX:
-      // CBZ XZR -> B
+      // CBZ/TBZ with WZR/XZR -> unconditional B
       if (MI.getOperand(0).getReg() == AArch64::WZR ||
           MI.getOperand(0).getReg() == AArch64::XZR) {
         LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
-        MachineBasicBlock *Target =
-            MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
-                .getMBB();
-        MachineBasicBlock *MBB = MI.getParent();
+        MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
         SmallVector<MachineBasicBlock *> Succs(MBB->successors());
         for (auto *S : Succs)
           if (S != Target)
             MBB->removeSuccessor(S);
-        SmallVector<MachineInstr *> DeadInstrs;
-        for (auto It = MI.getIterator(); It != MBB->end(); ++It)
-          DeadInstrs.push_back(&*It);
-        const MachineFunction *MF = MBB->getParent();
-        const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-        BuildMI(MBB, MI.getDebugLoc(), TII->get(AArch64::B)).addMBB(Target);
-        for (auto It : DeadInstrs)
-          It->eraseFromParent();
+        DebugLoc DL = MI.getDebugLoc();
+        while (MBB->rbegin() != &MI)
+          MBB->rbegin()->eraseFromParent();
+        MI.eraseFromParent();
+        BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
         return true;
       }
       break;
@@ -80,14 +75,11 @@ static bool optimizeTerminators(MachineBasicBlock *MBB) {
     case AArch64::CBNZX:
     case AArch64::TBNZW:
     case AArch64::TBNZX:
-      // CBNZ XZR -> nop
+      // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
       if (MI.getOperand(0).getReg() == AArch64::WZR ||
           MI.getOperand(0).getReg() == AArch64::XZR) {
         LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
-        MachineBasicBlock *Target =
-            MI.getOperand((Opc == AArch64::TBNZW || Opc == AArch64::TBNZX) ? 2
-                                                                           : 1)
-                .getMBB();
+        MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
         MI.getParent()->removeSuccessor(Target);
         MI.eraseFromParent();
         return true;
@@ -102,9 +94,11 @@ bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
   bool Changed = false;
   for (MachineBasicBlock &MBB : MF)
-    Changed |= optimizeTerminators(&MBB);
+    Changed |= optimizeTerminators(&MBB, TII);
   return Changed;
 }
 



More information about the llvm-commits mailing list