[llvm] [AArch64] Optimize CBZ wzr and friends. (PR #161508)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 07:53:02 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/161508
>From 161d07ed945a5715c21556679d0d1c5851a8099f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 4 Dec 2025 09:37:45 +0000
Subject: [PATCH 1/2] [AArch64] Optimize CBZ wzr and friends.
In certain situations, especially with zero phi operands propagated after tail
duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It
can can be introduced late in the pipeline.
This patch adds a basic pass to fold them away to either a direct branch or
removing the instruction entirely. It runs quite late n the pipeline, so doesnt
fit into any of the existing passes. It only needs to look at the terminators
to each BB, so the new pass should have a limited in compile-time impact.
---
llvm/lib/Target/AArch64/AArch64.h | 2 +
.../AArch64RedundantCondBranchPass.cpp | 113 ++++++++++++++++++
.../Target/AArch64/AArch64TargetMachine.cpp | 3 +
llvm/lib/Target/AArch64/CMakeLists.txt | 1 +
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 +
llvm/test/CodeGen/AArch64/arm64-rev.ll | 10 +-
.../CodeGen/AArch64/arm64-shrink-wrapping.ll | 18 +--
.../block-placement-optimize-branches.ll | 12 +-
llvm/test/CodeGen/AArch64/cbz_wzr.mir | 34 ++----
llvm/test/CodeGen/AArch64/pr164181.ll | 3 +-
llvm/test/CodeGen/AArch64/pr166870.ll | 1 -
llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 15 +--
...ch64_generated_funcs.ll.generated.expected | 4 +-
...64_generated_funcs.ll.nogenerated.expected | 4 +-
14 files changed, 159 insertions(+), 62 deletions(-)
create mode 100644 llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 4ae18385029a3..a8e15c338352a 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -33,6 +33,7 @@ class ModulePass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
+FunctionPass *createAArch64RedundantCondBranchPass();
FunctionPass *createAArch64CondBrTuning();
FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
@@ -103,6 +104,7 @@ void initializeAArch64PostSelectOptimizePass(PassRegistry &);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
+void initializeAArch64RedundantCondBranchPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry &);
void initializeAArch64SLSHardeningPass(PassRegistry &);
void initializeAArch64SpeculationHardeningPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
new file mode 100644
index 0000000000000..1bd47e24809ed
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
@@ -0,0 +1,113 @@
+//=- AArch64RedundantCondBranch.cpp - Remove redundant cbz wzr --------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Late in the pipeline, especially with zero phi operands propagated after tail
+// duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. This
+// simple pass looks at the terminators to a block, removing the redundant
+// instructions where necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-redundantcondbranch"
+
+namespace {
+class AArch64RedundantCondBranch : public MachineFunctionPass {
+public:
+ static char ID;
+ AArch64RedundantCondBranch() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoVRegs();
+ }
+ StringRef getPassName() const override {
+ return "AArch64 Redundant Conditional Branch Elimination";
+ }
+};
+char AArch64RedundantCondBranch::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
+ "AArch64 Redundant Conditional Branch Elimination pass", false,
+ false)
+
+static bool optimizeTerminators(MachineBasicBlock *MBB) {
+ for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ // CBZ XZR -> B
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
+ MachineBasicBlock *Target =
+ MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
+ .getMBB();
+ MachineBasicBlock *MBB = MI.getParent();
+ SmallVector<MachineBasicBlock *> Succs(MBB->successors());
+ for (auto *S : Succs)
+ if (S != Target)
+ MBB->removeSuccessor(S);
+ SmallVector<MachineInstr *> DeadInstrs;
+ for (auto It = MI.getIterator(); It != MBB->end(); ++It)
+ DeadInstrs.push_back(&*It);
+ const MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ BuildMI(MBB, MI.getDebugLoc(), TII->get(AArch64::B)).addMBB(Target);
+ for (auto It : DeadInstrs)
+ It->eraseFromParent();
+ return true;
+ }
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ // CBNZ XZR -> nop
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
+ MachineBasicBlock *Target =
+ MI.getOperand((Opc == AArch64::TBNZW || Opc == AArch64::TBNZX) ? 2
+ : 1)
+ .getMBB();
+ MI.getParent()->removeSuccessor(Target);
+ MI.eraseFromParent();
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= optimizeTerminators(&MBB);
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64RedundantCondBranchPass() {
+ return new AArch64RedundantCondBranch();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 090789ebb9a4a..346e18e553c5e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -260,6 +260,7 @@ LLVMInitializeAArch64Target() {
initializeAArch64PostSelectOptimizePass(PR);
initializeAArch64PromoteConstantPass(PR);
initializeAArch64RedundantCopyEliminationPass(PR);
+ initializeAArch64RedundantCondBranchPass(PR);
initializeAArch64StorePairSuppressPass(PR);
initializeFalkorHWPFFixPass(PR);
initializeFalkorMarkStridedAccessesLegacyPass(PR);
@@ -862,6 +863,8 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
EnableAArch64CopyPropagation)
addPass(createMachineCopyPropagationPass(true));
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
+ addPass(createAArch64RedundantCondBranchPass());
addPass(createAArch64A53Fix835769());
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 285d646293eb7..3334b3689e03f 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_target(AArch64CodeGen
AArch64CompressJumpTables.cpp
AArch64ConditionOptimizer.cpp
AArch64RedundantCopyElimination.cpp
+ AArch64RedundantCondBranchPass.cpp
AArch64ISelDAGToDAG.cpp
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 2102029e608ab..e8ea55e027aec 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -229,6 +229,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: AArch64 load / store optimization pass
; CHECK-NEXT: Machine Copy Propagation Pass
+; CHECK-NEXT: AArch64 Redundant Conditional Branch Elimination
; CHECK-NEXT: Workaround A53 erratum 835769 pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 84557b441853a..5980e3214d0da 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
define void @test_rev16_truncstore() {
; CHECK-SD-LABEL: test_rev16_truncstore:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cbnz wzr, .LBB38_2
; CHECK-SD-NEXT: .LBB38_1: // %cleanup
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: ldrh w8, [x8]
; CHECK-SD-NEXT: rev16 w8, w8
; CHECK-SD-NEXT: strh w8, [x8]
-; CHECK-SD-NEXT: cbz wzr, .LBB38_1
-; CHECK-SD-NEXT: .LBB38_2: // %fail
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: b .LBB38_1
;
; CHECK-GI-LABEL: test_rev16_truncstore:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2
; CHECK-GI-NEXT: .LBB38_1: // %cleanup
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w8, [x8]
; CHECK-GI-NEXT: rev w8, w8
; CHECK-GI-NEXT: lsr w8, w8, #16
; CHECK-GI-NEXT: strh w8, [x8]
-; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1
-; CHECK-GI-NEXT: .LBB38_2: // %fail
-; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: b .LBB38_1
entry:
br label %body
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 5806bcf0dacf1..724c8b3fc9170 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -735,7 +735,6 @@ define void @infiniteloop() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: cbnz wzr, LBB10_3
; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x19, sp, #16
; ENABLE-NEXT: mov sp, x19
@@ -746,7 +745,7 @@ define void @infiniteloop() {
; ENABLE-NEXT: add w20, w0, w20
; ENABLE-NEXT: str w20, [x19]
; ENABLE-NEXT: b LBB10_2
-; ENABLE-NEXT: LBB10_3: ; %if.end
+; ENABLE-NEXT: ; %bb.3: ; %if.end
; ENABLE-NEXT: sub sp, x29, #16
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -762,7 +761,6 @@ define void @infiniteloop() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: cbnz wzr, LBB10_3
; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x19, sp, #16
; DISABLE-NEXT: mov sp, x19
@@ -773,7 +771,7 @@ define void @infiniteloop() {
; DISABLE-NEXT: add w20, w0, w20
; DISABLE-NEXT: str w20, [x19]
; DISABLE-NEXT: b LBB10_2
-; DISABLE-NEXT: LBB10_3: ; %if.end
+; DISABLE-NEXT: ; %bb.3: ; %if.end
; DISABLE-NEXT: sub sp, x29, #16
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -808,7 +806,6 @@ define void @infiniteloop2() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: cbnz wzr, LBB11_3
; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x8, sp, #16
; ENABLE-NEXT: mov sp, x8
@@ -825,7 +822,7 @@ define void @infiniteloop2() {
; ENABLE-NEXT: nop
; ENABLE-NEXT: ; InlineAsm End
; ENABLE-NEXT: b LBB11_2
-; ENABLE-NEXT: LBB11_3: ; %if.end
+; ENABLE-NEXT: ; %bb.3: ; %if.end
; ENABLE-NEXT: sub sp, x29, #16
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -841,7 +838,6 @@ define void @infiniteloop2() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: cbnz wzr, LBB11_3
; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x8, sp, #16
; DISABLE-NEXT: mov sp, x8
@@ -858,7 +854,7 @@ define void @infiniteloop2() {
; DISABLE-NEXT: nop
; DISABLE-NEXT: ; InlineAsm End
; DISABLE-NEXT: b LBB11_2
-; DISABLE-NEXT: LBB11_3: ; %if.end
+; DISABLE-NEXT: ; %bb.3: ; %if.end
; DISABLE-NEXT: sub sp, x29, #16
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
@@ -893,7 +889,6 @@ if.end:
define void @infiniteloop3() {
; ENABLE-LABEL: infiniteloop3:
; ENABLE: ; %bb.0: ; %entry
-; ENABLE-NEXT: cbnz wzr, LBB12_5
; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; ENABLE-NEXT: mov x8, xzr
; ENABLE-NEXT: mov x9, xzr
@@ -912,12 +907,11 @@ define void @infiniteloop3() {
; ENABLE-NEXT: mov x8, x10
; ENABLE-NEXT: mov x11, x10
; ENABLE-NEXT: b LBB12_3
-; ENABLE-NEXT: LBB12_5: ; %end
+; ENABLE-NEXT: ; %bb.5: ; %end
; ENABLE-NEXT: ret
;
; DISABLE-LABEL: infiniteloop3:
; DISABLE: ; %bb.0: ; %entry
-; DISABLE-NEXT: cbnz wzr, LBB12_5
; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; DISABLE-NEXT: mov x8, xzr
; DISABLE-NEXT: mov x9, xzr
@@ -936,7 +930,7 @@ define void @infiniteloop3() {
; DISABLE-NEXT: mov x8, x10
; DISABLE-NEXT: mov x11, x10
; DISABLE-NEXT: b LBB12_3
-; DISABLE-NEXT: LBB12_5: ; %end
+; DISABLE-NEXT: ; %bb.5: ; %end
; DISABLE-NEXT: ret
entry:
br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e..7c3a567d1b336 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -8,21 +8,20 @@
define i8 @foo_optsize(i32 %v4) optsize {
; CHECK-LABEL: foo_optsize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cbz wzr, .LBB0_2
+; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: // %b1
; CHECK-NEXT: cbnz w0, .LBB0_4
-; CHECK-NEXT: .LBB0_3: // %b2
+; CHECK-NEXT: // %bb.3: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_4: // %b1
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: cbz wzr, .LBB0_1
-; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: b .LBB0_1
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
@@ -48,20 +47,19 @@ b4:
define i8 @foo_optspeed(i32 %v4) {
; CHECK-LABEL: foo_optspeed:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cbz wzr, .LBB1_2
+; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2: // %b1
; CHECK-NEXT: cbnz w0, .LBB1_4
-; CHECK-NEXT: .LBB1_3: // %b2
+; CHECK-NEXT: // %bb.3: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_4: // %b1
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: cbnz wzr, .LBB1_3
; CHECK-NEXT: b .LBB1_1
entry:
%v2 = icmp eq i32 0, 0
diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
index 7deea56ba23a1..9e70d2a12b0fd 100644
--- a/llvm/test/CodeGen/AArch64/cbz_wzr.mir
+++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp -mcp-use-is-copy-instr | FileCheck %s
+# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp,aarch64-redundantcondbranch -mcp-use-is-copy-instr | FileCheck %s
---
name: cbz_wzr
@@ -7,10 +7,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBZW $wzr, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -39,11 +39,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbnz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZW $wzr, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -71,10 +69,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBZW $wzr, 0, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -103,11 +101,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbnz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBNZW $wzr, 0, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -136,10 +132,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBZX $xzr, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -168,11 +164,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbnz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZX $xzr, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -200,10 +194,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBZX $xzr, 0, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -232,11 +226,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbnz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBNZX $xzr, 0, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index 18732ae5ae300..abb090ccbcaed 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -288,7 +288,6 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
; CHECK-NEXT: // => This Inner Loop Header: Depth=5
-; CHECK-NEXT: cbnz wzr, .LBB0_30
; CHECK-NEXT: // %bb.29: // %if.then222.us
; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: adrp x27, :got:var_32
@@ -299,7 +298,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: bic w25, w8, w8, asr #31
; CHECK-NEXT: b .LBB0_31
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: // %bb.30:
; CHECK-NEXT: mov w25, wzr
; CHECK-NEXT: .LBB0_31: // %if.end239.us
; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll
index d6f99c67a01ff..48967d30485ab 100644
--- a/llvm/test/CodeGen/AArch64/pr166870.ll
+++ b/llvm/test/CodeGen/AArch64/pr166870.ll
@@ -26,7 +26,6 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi
; CHECK-NEXT: mov x21, x1
; CHECK-NEXT: bl baz
; CHECK-NEXT: mov w0, #0 // =0x0
-; CHECK-NEXT: cbnz wzr, .LBB0_11
; CHECK-NEXT: // %bb.5: // %bb6
; CHECK-NEXT: mov w10, #1 // =0x1
; CHECK-NEXT: cbnz w10, .LBB0_11
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index 6946cc23d867d..72e3a2ef59677 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -586,13 +586,12 @@ define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: tbz w0, #0, .LBB20_2
; CHECK-SD-NEXT: // %bb.1:
-; CHECK-SD-NEXT: tbnz wzr, #0, .LBB20_3
; CHECK-SD-NEXT: b .LBB20_4
; CHECK-SD-NEXT: .LBB20_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: tbz w8, #0, .LBB20_4
-; CHECK-SD-NEXT: .LBB20_3: // %if.else25
+; CHECK-SD-NEXT: // %bb.3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB20_4: // %common.ret
; CHECK-SD-NEXT: mov x0, xzr
@@ -656,7 +655,7 @@ define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD-NEXT: b .LBB21_4
; CHECK-SD-NEXT: .LBB21_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: str wzr, [x1]
-; CHECK-SD-NEXT: tbz wzr, #0, .LBB21_4
+; CHECK-SD-NEXT: b .LBB21_4
; CHECK-SD-NEXT: .LBB21_3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB21_4: // %common.ret
@@ -716,13 +715,12 @@ define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: tbz w0, #0, .LBB22_2
; CHECK-SD-NEXT: // %bb.1:
-; CHECK-SD-NEXT: cbnz wzr, .LBB22_3
; CHECK-SD-NEXT: b .LBB22_4
; CHECK-SD-NEXT: .LBB22_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: mov w8, #10 // =0xa
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: cbz w8, .LBB22_4
-; CHECK-SD-NEXT: .LBB22_3: // %if.else25
+; CHECK-SD-NEXT: // %bb.3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB22_4: // %common.ret
; CHECK-SD-NEXT: mov x0, xzr
@@ -784,7 +782,7 @@ define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD-NEXT: b .LBB23_4
; CHECK-SD-NEXT: .LBB23_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: str wzr, [x1]
-; CHECK-SD-NEXT: cbz wzr, .LBB23_4
+; CHECK-SD-NEXT: b .LBB23_4
; CHECK-SD-NEXT: .LBB23_3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB23_4: // %common.ret
@@ -848,12 +846,11 @@ define i1 @avifSequenceHeaderParse() {
; CHECK-SD-NEXT: .LBB24_2: // %bb1
; CHECK-SD-NEXT: cbz w8, .LBB24_4
; CHECK-SD-NEXT: // %bb.3:
-; CHECK-SD-NEXT: tbz xzr, #63, .LBB24_1
-; CHECK-SD-NEXT: b .LBB24_5
+; CHECK-SD-NEXT: b .LBB24_1
; CHECK-SD-NEXT: .LBB24_4: // %bb2
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: tbz x8, #63, .LBB24_1
-; CHECK-SD-NEXT: .LBB24_5: // %bb4
+; CHECK-SD-NEXT: // %bb.5: // %bb4
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, wzr
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index 97b17d98d3472..f4815dc331056 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -71,14 +71,16 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: cbz wzr, .LBB0_3
+; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: ldur w8, [x29, #-8]
; CHECK-NEXT: cbz w8, .LBB0_4
; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: .cfi_restore_state
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: b .LBB0_5
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
index 3d379ea1faf5f..bc3500522672d 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
@@ -12,14 +12,16 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: cbz wzr, .LBB0_3
+; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: ldur w8, [x29, #-8]
; CHECK-NEXT: cbz w8, .LBB0_4
; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: .cfi_restore_state
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: b .LBB0_5
>From 6af5310b16775580da5aee26849d7be481ebfed3 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 5 Dec 2025 15:52:15 +0000
Subject: [PATCH 2/2] Address comments and cleanup
---
.../AArch64RedundantCondBranchPass.cpp | 36 ++++++++-----------
1 file changed, 15 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
index 1bd47e24809ed..1b990796ec9da 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
@@ -1,4 +1,4 @@
-//=- AArch64RedundantCondBranch.cpp - Remove redundant cbz wzr --------------=//
+//=- AArch64RedundantCondBranch.cpp - Remove redundant conditional branches -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -45,7 +45,8 @@ INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
"AArch64 Redundant Conditional Branch Elimination pass", false,
false)
-static bool optimizeTerminators(MachineBasicBlock *MBB) {
+static bool optimizeTerminators(MachineBasicBlock *MBB,
+ const TargetInstrInfo &TII) {
for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
@@ -53,26 +54,20 @@ static bool optimizeTerminators(MachineBasicBlock *MBB) {
case AArch64::CBZX:
case AArch64::TBZW:
case AArch64::TBZX:
- // CBZ XZR -> B
+ // CBZ/TBZ with WZR/XZR -> unconditional B
if (MI.getOperand(0).getReg() == AArch64::WZR ||
MI.getOperand(0).getReg() == AArch64::XZR) {
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
- MachineBasicBlock *Target =
- MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
- .getMBB();
- MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
SmallVector<MachineBasicBlock *> Succs(MBB->successors());
for (auto *S : Succs)
if (S != Target)
MBB->removeSuccessor(S);
- SmallVector<MachineInstr *> DeadInstrs;
- for (auto It = MI.getIterator(); It != MBB->end(); ++It)
- DeadInstrs.push_back(&*It);
- const MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- BuildMI(MBB, MI.getDebugLoc(), TII->get(AArch64::B)).addMBB(Target);
- for (auto It : DeadInstrs)
- It->eraseFromParent();
+ DebugLoc DL = MI.getDebugLoc();
+ while (MBB->rbegin() != &MI)
+ MBB->rbegin()->eraseFromParent();
+ MI.eraseFromParent();
+ BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
return true;
}
break;
@@ -80,14 +75,11 @@ static bool optimizeTerminators(MachineBasicBlock *MBB) {
case AArch64::CBNZX:
case AArch64::TBNZW:
case AArch64::TBNZX:
- // CBNZ XZR -> nop
+ // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
if (MI.getOperand(0).getReg() == AArch64::WZR ||
MI.getOperand(0).getReg() == AArch64::XZR) {
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
- MachineBasicBlock *Target =
- MI.getOperand((Opc == AArch64::TBNZW || Opc == AArch64::TBNZX) ? 2
- : 1)
- .getMBB();
+ MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
MI.getParent()->removeSuccessor(Target);
MI.eraseFromParent();
return true;
@@ -102,9 +94,11 @@ bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
- Changed |= optimizeTerminators(&MBB);
+ Changed |= optimizeTerminators(&MBB, TII);
return Changed;
}
More information about the llvm-commits
mailing list