[llvm] [AArch64] Optimize CBZ wzr and friends. (PR #161508)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 04:45:49 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/161508
In certain situations, especially with zero phi operands propagated after tail duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It only happens late in the pipeline. This patch adds a basic simplifyInstruction to fold them away to either a direct branch or removing the instruction entirely.
It needs some fixups, but seems to work.
>From 01056469350de53e093ceb605b7c6605d48c37cc Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 1 Oct 2025 12:44:04 +0100
Subject: [PATCH] [AArch64] Optimize CBZ wzr and friends.
In certain situations, especially with zero phi operands propagated after tail
duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It
only happens late in the pipeline. This patch adds a basic simplifyInstruction
to fold them away to either a direct branch or removing the instruction
entirely.
It needs some fixups, but seems to work.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 5 +-
llvm/lib/CodeGen/MachineCopyPropagation.cpp | 8 +-
llvm/lib/CodeGen/ShrinkWrap.cpp | 2 +
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 49 ++++++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 +
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 3 +-
llvm/test/CodeGen/AArch64/arm64-rev.ll | 10 +--
.../CodeGen/AArch64/arm64-shrink-wrapping.ll | 80 +++++--------------
.../block-placement-optimize-branches.ll | 34 +++-----
llvm/test/CodeGen/AArch64/cbz_wzr.mir | 32 +++-----
.../AArch64/lr-reserved-for-ra-live-in.ll | 2 -
llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 15 ++--
...ch64_generated_funcs.ll.generated.expected | 21 ++---
...64_generated_funcs.ll.nogenerated.expected | 20 ++---
15 files changed, 136 insertions(+), 151 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 175f205328361..4bc5b50293db1 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -550,7 +550,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// MachineCopyPropagation, where their mutation of the MI operands may
/// expose opportunities to convert the instruction to a simpler form (e.g.
/// a load of 0).
- virtual bool simplifyInstruction(MachineInstr &MI) const { return false; }
+ virtual bool simplifyInstruction(MachineInstr &MI,
+ bool &AlteredTerminators) const {
+ return false;
+ }
/// A pair composed of a register and a sub-register index.
/// Used to give some type checking when modeling Reg:SubReg.
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index e35983138550f..0a0a7dc0a1ed0 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -928,9 +928,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Attempt to canonicalize/optimize the instruction now its arguments have
// been mutated. This may convert MI from a non-copy to a copy instruction.
- if (TII->simplifyInstruction(MI)) {
+ bool AlteredTerminators = false;
+ if (TII->simplifyInstruction(MI, AlteredTerminators)) {
Changed = true;
- LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI);
+ if (AlteredTerminators)
+ break;
+ else
+ LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI);
}
CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 826e4126de44c..88441b1f6a7a8 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -618,6 +618,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
DenseSet<const MachineBasicBlock *> DirtyBBs;
for (MachineBasicBlock &MBB : MF) {
+ if (!MDT->isReachableFromEntry(&MBB))
+ continue;
if (MBB.isEHPad()) {
DirtyBBs.insert(&MBB);
continue;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5a51c812732e6..99a18201283b1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -685,6 +685,55 @@ unsigned AArch64InstrInfo::insertBranch(
return 2;
}
+bool AArch64InstrInfo::simplifyInstruction(MachineInstr &MI,
+ bool &AlteredTerminators) const {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ // CBZ XZR -> B
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ MachineBasicBlock *Target =
+ MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
+ .getMBB();
+ MachineBasicBlock *MBB = MI.getParent();
+ SmallVector<MachineBasicBlock *> Succs(MBB->successors());
+ for (auto *S : Succs)
+ if (S != Target)
+ MBB->removeSuccessor(S);
+ SmallVector<MachineInstr*> DeadInstrs;
+ for (auto It = MI.getIterator(); It != MBB->end(); ++It)
+ DeadInstrs.push_back(&*It);
+ BuildMI(MBB, MI.getDebugLoc(), get(AArch64::B)).addMBB(Target);
+ for (auto It : DeadInstrs)
+ It->eraseFromParent();
+ AlteredTerminators = true;
+ return true;
+ }
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ // CBNZ XZR -> nop
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ MachineBasicBlock *Target =
+ MI.getOperand(Opc == AArch64::TBNZW || Opc == AArch64::TBNZX ? 2 : 1)
+ .getMBB();
+ MI.getParent()->removeSuccessor(Target);
+ MI.eraseFromParent();
+ AlteredTerminators = true;
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
// Find the original register that VReg is copied from.
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
while (Register::isVirtualRegister(VReg)) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 179574a73aa01..c27f0b7bc608f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -401,6 +401,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
+ bool simplifyInstruction(MachineInstr &MI,
+ bool &AlteredTerminators) const override;
+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8a4281..bd2e446212cb6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4169,7 +4169,8 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
#undef CASE_VFMA_OPCODE_VV
#undef CASE_VFMA_SPLATS
-bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
+bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI,
+ bool &AlteredTerminators) const {
switch (MI.getOpcode()) {
default:
break;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 42a0c4c01b472..e2ba31318f131 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -239,7 +239,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
unsigned OpIdx1,
unsigned OpIdx2) const override;
- bool simplifyInstruction(MachineInstr &MI) const override;
+ bool simplifyInstruction(MachineInstr &MI,
+ bool &AlteredTerminators) const override;
MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
LiveIntervals *LIS) const override;
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 84557b441853a..5980e3214d0da 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
define void @test_rev16_truncstore() {
; CHECK-SD-LABEL: test_rev16_truncstore:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cbnz wzr, .LBB38_2
; CHECK-SD-NEXT: .LBB38_1: // %cleanup
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: ldrh w8, [x8]
; CHECK-SD-NEXT: rev16 w8, w8
; CHECK-SD-NEXT: strh w8, [x8]
-; CHECK-SD-NEXT: cbz wzr, .LBB38_1
-; CHECK-SD-NEXT: .LBB38_2: // %fail
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: b .LBB38_1
;
; CHECK-GI-LABEL: test_rev16_truncstore:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2
; CHECK-GI-NEXT: .LBB38_1: // %cleanup
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w8, [x8]
; CHECK-GI-NEXT: rev w8, w8
; CHECK-GI-NEXT: lsr w8, w8, #16
; CHECK-GI-NEXT: strh w8, [x8]
-; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1
-; CHECK-GI-NEXT: .LBB38_2: // %fail
-; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: b .LBB38_1
entry:
br label %body
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 5806bcf0dacf1..b837a361bd287 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -735,22 +735,15 @@ define void @infiniteloop() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: cbnz wzr, LBB10_3
-; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x19, sp, #16
; ENABLE-NEXT: mov sp, x19
; ENABLE-NEXT: mov w20, wzr
-; ENABLE-NEXT: LBB10_2: ; %for.body
+; ENABLE-NEXT: LBB10_1: ; %for.body
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: bl _something
; ENABLE-NEXT: add w20, w0, w20
; ENABLE-NEXT: str w20, [x19]
-; ENABLE-NEXT: b LBB10_2
-; ENABLE-NEXT: LBB10_3: ; %if.end
-; ENABLE-NEXT: sub sp, x29, #16
-; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB10_1
;
; DISABLE-LABEL: infiniteloop:
; DISABLE: ; %bb.0: ; %entry
@@ -762,22 +755,15 @@ define void @infiniteloop() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: cbnz wzr, LBB10_3
-; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x19, sp, #16
; DISABLE-NEXT: mov sp, x19
; DISABLE-NEXT: mov w20, wzr
-; DISABLE-NEXT: LBB10_2: ; %for.body
+; DISABLE-NEXT: LBB10_1: ; %for.body
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: bl _something
; DISABLE-NEXT: add w20, w0, w20
; DISABLE-NEXT: str w20, [x19]
-; DISABLE-NEXT: b LBB10_2
-; DISABLE-NEXT: LBB10_3: ; %if.end
-; DISABLE-NEXT: sub sp, x29, #16
-; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB10_1
entry:
br i1 undef, label %if.then, label %if.end
@@ -808,12 +794,10 @@ define void @infiniteloop2() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: cbnz wzr, LBB11_3
-; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x8, sp, #16
; ENABLE-NEXT: mov sp, x8
; ENABLE-NEXT: mov w9, wzr
-; ENABLE-NEXT: LBB11_2: ; %for.body
+; ENABLE-NEXT: LBB11_1: ; %for.body
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: ; InlineAsm Start
; ENABLE-NEXT: mov x10, #0 ; =0x0
@@ -824,12 +808,7 @@ define void @infiniteloop2() {
; ENABLE-NEXT: ; InlineAsm Start
; ENABLE-NEXT: nop
; ENABLE-NEXT: ; InlineAsm End
-; ENABLE-NEXT: b LBB11_2
-; ENABLE-NEXT: LBB11_3: ; %if.end
-; ENABLE-NEXT: sub sp, x29, #16
-; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB11_1
;
; DISABLE-LABEL: infiniteloop2:
; DISABLE: ; %bb.0: ; %entry
@@ -841,12 +820,10 @@ define void @infiniteloop2() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: cbnz wzr, LBB11_3
-; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x8, sp, #16
; DISABLE-NEXT: mov sp, x8
; DISABLE-NEXT: mov w9, wzr
-; DISABLE-NEXT: LBB11_2: ; %for.body
+; DISABLE-NEXT: LBB11_1: ; %for.body
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: ; InlineAsm Start
; DISABLE-NEXT: mov x10, #0 ; =0x0
@@ -857,12 +834,7 @@ define void @infiniteloop2() {
; DISABLE-NEXT: ; InlineAsm Start
; DISABLE-NEXT: nop
; DISABLE-NEXT: ; InlineAsm End
-; DISABLE-NEXT: b LBB11_2
-; DISABLE-NEXT: LBB11_3: ; %if.end
-; DISABLE-NEXT: sub sp, x29, #16
-; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB11_1
entry:
br i1 undef, label %if.then, label %if.end
@@ -893,51 +865,43 @@ if.end:
define void @infiniteloop3() {
; ENABLE-LABEL: infiniteloop3:
; ENABLE: ; %bb.0: ; %entry
-; ENABLE-NEXT: cbnz wzr, LBB12_5
-; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; ENABLE-NEXT: mov x8, xzr
; ENABLE-NEXT: mov x9, xzr
; ENABLE-NEXT: mov x11, xzr
-; ENABLE-NEXT: b LBB12_3
-; ENABLE-NEXT: LBB12_2: ; %loop2b
-; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT: b LBB12_2
+; ENABLE-NEXT: LBB12_1: ; %loop2b
+; ENABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
; ENABLE-NEXT: str x10, [x11]
; ENABLE-NEXT: mov x11, x10
-; ENABLE-NEXT: LBB12_3: ; %loop1
+; ENABLE-NEXT: LBB12_2: ; %loop1
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: mov x10, x9
; ENABLE-NEXT: ldr x9, [x8]
-; ENABLE-NEXT: cbnz x8, LBB12_2
-; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT: cbnz x8, LBB12_1
+; ENABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
; ENABLE-NEXT: mov x8, x10
; ENABLE-NEXT: mov x11, x10
-; ENABLE-NEXT: b LBB12_3
-; ENABLE-NEXT: LBB12_5: ; %end
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB12_2
;
; DISABLE-LABEL: infiniteloop3:
; DISABLE: ; %bb.0: ; %entry
-; DISABLE-NEXT: cbnz wzr, LBB12_5
-; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; DISABLE-NEXT: mov x8, xzr
; DISABLE-NEXT: mov x9, xzr
; DISABLE-NEXT: mov x11, xzr
-; DISABLE-NEXT: b LBB12_3
-; DISABLE-NEXT: LBB12_2: ; %loop2b
-; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT: b LBB12_2
+; DISABLE-NEXT: LBB12_1: ; %loop2b
+; DISABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
; DISABLE-NEXT: str x10, [x11]
; DISABLE-NEXT: mov x11, x10
-; DISABLE-NEXT: LBB12_3: ; %loop1
+; DISABLE-NEXT: LBB12_2: ; %loop1
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: mov x10, x9
; DISABLE-NEXT: ldr x9, [x8]
-; DISABLE-NEXT: cbnz x8, LBB12_2
-; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT: cbnz x8, LBB12_1
+; DISABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
; DISABLE-NEXT: mov x8, x10
; DISABLE-NEXT: mov x11, x10
-; DISABLE-NEXT: b LBB12_3
-; DISABLE-NEXT: LBB12_5: ; %end
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB12_2
entry:
br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e..6e6fb6f367867 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -8,21 +8,14 @@
define i8 @foo_optsize(i32 %v4) optsize {
; CHECK-LABEL: foo_optsize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cbz wzr, .LBB0_2
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_2: // %b1
-; CHECK-NEXT: cbnz w0, .LBB0_4
-; CHECK-NEXT: .LBB0_3: // %b2
+; CHECK-NEXT: cbnz w0, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_4: // %b1
+; CHECK-NEXT: .LBB0_2: // %b1
; CHECK-NEXT: cmp w0, #1
-; CHECK-NEXT: b.ne .LBB0_1
-; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: cbz wzr, .LBB0_1
-; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
@@ -48,21 +41,14 @@ b4:
define i8 @foo_optspeed(i32 %v4) {
; CHECK-LABEL: foo_optspeed:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cbz wzr, .LBB1_2
-; CHECK-NEXT: .LBB1_1:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_2: // %b1
-; CHECK-NEXT: cbnz w0, .LBB1_4
-; CHECK-NEXT: .LBB1_3: // %b2
+; CHECK-NEXT: cbnz w0, .LBB1_2
+; CHECK-NEXT: // %bb.1: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_4: // %b1
+; CHECK-NEXT: .LBB1_2: // %b1
; CHECK-NEXT: cmp w0, #1
-; CHECK-NEXT: b.ne .LBB1_1
-; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: cbnz wzr, .LBB1_3
-; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
index 7deea56ba23a1..5d841c26014b2 100644
--- a/llvm/test/CodeGen/AArch64/cbz_wzr.mir
+++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
@@ -7,10 +7,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBZW $wzr, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -39,11 +39,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbnz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZW $wzr, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -71,10 +69,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBZW $wzr, 0, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -103,11 +101,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbnz_wzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBNZW $wzr, 0, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -136,10 +132,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBZX $xzr, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -168,11 +164,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: cbnz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZX $xzr, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
@@ -200,10 +194,10 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBZX $xzr, 0, %bb.2
+ ; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
@@ -232,11 +226,9 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbnz_xzr
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: TBNZX $xzr, 0, %bb.2
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: $w0 = MOVZWi 10, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
index 29427146e8a43..708ba621c26d8 100644
--- a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
+++ b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
@@ -21,10 +21,8 @@ define i32 @check_lr_liveness(ptr %arg) #1 {
; CHECK-NEXT: B %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb:
- ; CHECK-NEXT: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555)
; CHECK-NEXT: liveins: $w0, $lr
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZW $wzr, %bb.3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index 6946cc23d867d..72e3a2ef59677 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -586,13 +586,12 @@ define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: tbz w0, #0, .LBB20_2
; CHECK-SD-NEXT: // %bb.1:
-; CHECK-SD-NEXT: tbnz wzr, #0, .LBB20_3
; CHECK-SD-NEXT: b .LBB20_4
; CHECK-SD-NEXT: .LBB20_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: tbz w8, #0, .LBB20_4
-; CHECK-SD-NEXT: .LBB20_3: // %if.else25
+; CHECK-SD-NEXT: // %bb.3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB20_4: // %common.ret
; CHECK-SD-NEXT: mov x0, xzr
@@ -656,7 +655,7 @@ define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD-NEXT: b .LBB21_4
; CHECK-SD-NEXT: .LBB21_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: str wzr, [x1]
-; CHECK-SD-NEXT: tbz wzr, #0, .LBB21_4
+; CHECK-SD-NEXT: b .LBB21_4
; CHECK-SD-NEXT: .LBB21_3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB21_4: // %common.ret
@@ -716,13 +715,12 @@ define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: tbz w0, #0, .LBB22_2
; CHECK-SD-NEXT: // %bb.1:
-; CHECK-SD-NEXT: cbnz wzr, .LBB22_3
; CHECK-SD-NEXT: b .LBB22_4
; CHECK-SD-NEXT: .LBB22_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: mov w8, #10 // =0xa
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: cbz w8, .LBB22_4
-; CHECK-SD-NEXT: .LBB22_3: // %if.else25
+; CHECK-SD-NEXT: // %bb.3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB22_4: // %common.ret
; CHECK-SD-NEXT: mov x0, xzr
@@ -784,7 +782,7 @@ define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
; CHECK-SD-NEXT: b .LBB23_4
; CHECK-SD-NEXT: .LBB23_2: // %opnfil.exit.thread
; CHECK-SD-NEXT: str wzr, [x1]
-; CHECK-SD-NEXT: cbz wzr, .LBB23_4
+; CHECK-SD-NEXT: b .LBB23_4
; CHECK-SD-NEXT: .LBB23_3: // %if.else25
; CHECK-SD-NEXT: str wzr, [x1]
; CHECK-SD-NEXT: .LBB23_4: // %common.ret
@@ -848,12 +846,11 @@ define i1 @avifSequenceHeaderParse() {
; CHECK-SD-NEXT: .LBB24_2: // %bb1
; CHECK-SD-NEXT: cbz w8, .LBB24_4
; CHECK-SD-NEXT: // %bb.3:
-; CHECK-SD-NEXT: tbz xzr, #63, .LBB24_1
-; CHECK-SD-NEXT: b .LBB24_5
+; CHECK-SD-NEXT: b .LBB24_1
; CHECK-SD-NEXT: .LBB24_4: // %bb2
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: tbz x8, #63, .LBB24_1
-; CHECK-SD-NEXT: .LBB24_5: // %bb4
+; CHECK-SD-NEXT: // %bb.5: // %bb4
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: mov w0, wzr
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index 97b17d98d3472..215b11a746759 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -72,24 +72,20 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: cbz wzr, .LBB0_3
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: str w8, [sp, #16]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbz w8, .LBB0_4
-; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: cbz w8, .LBB0_2
+; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbnz w8, .LBB0_2
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .cfi_def_cfa wsp, 48
; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
@@ -132,7 +128,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
;
; CHECK-LABEL: OUTLINED_FUNCTION_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stp w9, w8, [x29, #-12]
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: mov w8, #4 // =0x4
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
index 3d379ea1faf5f..bf7cf2b54983b 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
@@ -13,24 +13,20 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: cbz wzr, .LBB0_3
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: str w8, [sp, #16]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbz w8, .LBB0_4
-; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: cbz w8, .LBB0_2
+; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbnz w8, .LBB0_2
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .cfi_def_cfa wsp, 48
; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
More information about the llvm-commits
mailing list