[llvm] [AArch64] Run optimizeTerminators earlier too. (PR #170907)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 00:55:39 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/170907
>From 816ec851214eede1fbb37dc166c871f87e4d9e2e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 8 Dec 2025 08:55:25 +0000
Subject: [PATCH] [AArch64] Run optimizeTerminators earlier too.
Running optimizeTerminators prior to other optimizations like branch layout can
lead to more folding and better codegen, but is not on its own able to capture
all cases. There is benefit to running it in both places. This adds the
existing code from #161508 into the AArch64RedundantCopyElimination pass, which
sounds like a sensible enough place for it.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 47 ++++++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 +
.../AArch64RedundantCondBranchPass.cpp | 46 +-----------
.../AArch64RedundantCopyElimination.cpp | 6 +-
.../CodeGen/AArch64/arm64-shrink-wrapping.ll | 74 ++++++-------------
.../block-placement-optimize-branches.ll | 32 +++-----
.../AArch64/lr-reserved-for-ra-live-in.ll | 2 -
llvm/test/CodeGen/AArch64/pr164181.ll | 66 ++++++++---------
llvm/test/CodeGen/AArch64/pr166870.ll | 21 +++---
...ch64_generated_funcs.ll.generated.expected | 23 ++----
...64_generated_funcs.ll.nogenerated.expected | 22 ++----
11 files changed, 142 insertions(+), 199 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 904577b8233d5..644089715736c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -708,6 +708,53 @@ unsigned AArch64InstrInfo::insertBranch(
return 2;
}
+bool llvm::optimizeTerminators(MachineBasicBlock *MBB,
+ const TargetInstrInfo &TII) {
+ for (MachineInstr &MI : MBB->terminators()) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ // CBZ/TBZ with WZR/XZR -> unconditional B
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ DEBUG_WITH_TYPE("optimizeTerminators",
+ dbgs() << "Removing always taken branch: " << MI);
+ MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
+ SmallVector<MachineBasicBlock *> Succs(MBB->successors());
+ for (auto *S : Succs)
+ if (S != Target)
+ MBB->removeSuccessor(S);
+ DebugLoc DL = MI.getDebugLoc();
+ while (MBB->rbegin() != &MI)
+ MBB->rbegin()->eraseFromParent();
+ MI.eraseFromParent();
+ BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
+ return true;
+ }
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
+ if (MI.getOperand(0).getReg() == AArch64::WZR ||
+ MI.getOperand(0).getReg() == AArch64::XZR) {
+ DEBUG_WITH_TYPE("optimizeTerminators",
+ dbgs() << "Removing never taken branch: " << MI);
+ MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
+ MI.getParent()->removeSuccessor(Target);
+ MI.eraseFromParent();
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+}
+
// Find the original register that VReg is copied from.
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
while (Register::isVirtualRegister(VReg)) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 2de2e0d73901f..d237721450644 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -705,6 +705,8 @@ int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
unsigned *OutUnscaledOp = nullptr,
int64_t *EmittableOffset = nullptr);
+bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII);
+
static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
static inline bool isCondBranchOpcode(int Opc) {
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
index 1b990796ec9da..1a5a9f0a6018b 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -45,51 +46,6 @@ INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
"AArch64 Redundant Conditional Branch Elimination pass", false,
false)
-static bool optimizeTerminators(MachineBasicBlock *MBB,
- const TargetInstrInfo &TII) {
- for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- case AArch64::CBZW:
- case AArch64::CBZX:
- case AArch64::TBZW:
- case AArch64::TBZX:
- // CBZ/TBZ with WZR/XZR -> unconditional B
- if (MI.getOperand(0).getReg() == AArch64::WZR ||
- MI.getOperand(0).getReg() == AArch64::XZR) {
- LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
- MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
- SmallVector<MachineBasicBlock *> Succs(MBB->successors());
- for (auto *S : Succs)
- if (S != Target)
- MBB->removeSuccessor(S);
- DebugLoc DL = MI.getDebugLoc();
- while (MBB->rbegin() != &MI)
- MBB->rbegin()->eraseFromParent();
- MI.eraseFromParent();
- BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
- return true;
- }
- break;
- case AArch64::CBNZW:
- case AArch64::CBNZX:
- case AArch64::TBNZW:
- case AArch64::TBNZX:
- // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
- if (MI.getOperand(0).getReg() == AArch64::WZR ||
- MI.getOperand(0).getReg() == AArch64::XZR) {
- LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
- MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
- MI.getParent()->removeSuccessor(Target);
- MI.eraseFromParent();
- return true;
- }
- break;
- }
- }
- return false;
-}
-
bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 84015e5061768..9dc721eb7315d 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -50,6 +50,7 @@
// to use WZR/XZR directly in some cases.
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64InstrInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -475,6 +476,7 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
return false;
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Resize the clobbered and used register unit trackers. We do this once per
// function.
@@ -484,8 +486,10 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
OptBBUsedRegs.init(*TRI);
bool Changed = false;
- for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock &MBB : MF) {
+ Changed |= optimizeTerminators(&MBB, TII);
Changed |= optimizeBlock(&MBB);
+ }
return Changed;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 724c8b3fc9170..b837a361bd287 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -735,21 +735,15 @@ define void @infiniteloop() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x19, sp, #16
; ENABLE-NEXT: mov sp, x19
; ENABLE-NEXT: mov w20, wzr
-; ENABLE-NEXT: LBB10_2: ; %for.body
+; ENABLE-NEXT: LBB10_1: ; %for.body
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: bl _something
; ENABLE-NEXT: add w20, w0, w20
; ENABLE-NEXT: str w20, [x19]
-; ENABLE-NEXT: b LBB10_2
-; ENABLE-NEXT: ; %bb.3: ; %if.end
-; ENABLE-NEXT: sub sp, x29, #16
-; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB10_1
;
; DISABLE-LABEL: infiniteloop:
; DISABLE: ; %bb.0: ; %entry
@@ -761,21 +755,15 @@ define void @infiniteloop() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x19, sp, #16
; DISABLE-NEXT: mov sp, x19
; DISABLE-NEXT: mov w20, wzr
-; DISABLE-NEXT: LBB10_2: ; %for.body
+; DISABLE-NEXT: LBB10_1: ; %for.body
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: bl _something
; DISABLE-NEXT: add w20, w0, w20
; DISABLE-NEXT: str w20, [x19]
-; DISABLE-NEXT: b LBB10_2
-; DISABLE-NEXT: ; %bb.3: ; %if.end
-; DISABLE-NEXT: sub sp, x29, #16
-; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB10_1
entry:
br i1 undef, label %if.then, label %if.end
@@ -806,11 +794,10 @@ define void @infiniteloop2() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
-; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x8, sp, #16
; ENABLE-NEXT: mov sp, x8
; ENABLE-NEXT: mov w9, wzr
-; ENABLE-NEXT: LBB11_2: ; %for.body
+; ENABLE-NEXT: LBB11_1: ; %for.body
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: ; InlineAsm Start
; ENABLE-NEXT: mov x10, #0 ; =0x0
@@ -821,12 +808,7 @@ define void @infiniteloop2() {
; ENABLE-NEXT: ; InlineAsm Start
; ENABLE-NEXT: nop
; ENABLE-NEXT: ; InlineAsm End
-; ENABLE-NEXT: b LBB11_2
-; ENABLE-NEXT: ; %bb.3: ; %if.end
-; ENABLE-NEXT: sub sp, x29, #16
-; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB11_1
;
; DISABLE-LABEL: infiniteloop2:
; DISABLE: ; %bb.0: ; %entry
@@ -838,11 +820,10 @@ define void @infiniteloop2() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
-; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x8, sp, #16
; DISABLE-NEXT: mov sp, x8
; DISABLE-NEXT: mov w9, wzr
-; DISABLE-NEXT: LBB11_2: ; %for.body
+; DISABLE-NEXT: LBB11_1: ; %for.body
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: ; InlineAsm Start
; DISABLE-NEXT: mov x10, #0 ; =0x0
@@ -853,12 +834,7 @@ define void @infiniteloop2() {
; DISABLE-NEXT: ; InlineAsm Start
; DISABLE-NEXT: nop
; DISABLE-NEXT: ; InlineAsm End
-; DISABLE-NEXT: b LBB11_2
-; DISABLE-NEXT: ; %bb.3: ; %if.end
-; DISABLE-NEXT: sub sp, x29, #16
-; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB11_1
entry:
br i1 undef, label %if.then, label %if.end
@@ -889,49 +865,43 @@ if.end:
define void @infiniteloop3() {
; ENABLE-LABEL: infiniteloop3:
; ENABLE: ; %bb.0: ; %entry
-; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; ENABLE-NEXT: mov x8, xzr
; ENABLE-NEXT: mov x9, xzr
; ENABLE-NEXT: mov x11, xzr
-; ENABLE-NEXT: b LBB12_3
-; ENABLE-NEXT: LBB12_2: ; %loop2b
-; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT: b LBB12_2
+; ENABLE-NEXT: LBB12_1: ; %loop2b
+; ENABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
; ENABLE-NEXT: str x10, [x11]
; ENABLE-NEXT: mov x11, x10
-; ENABLE-NEXT: LBB12_3: ; %loop1
+; ENABLE-NEXT: LBB12_2: ; %loop1
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: mov x10, x9
; ENABLE-NEXT: ldr x9, [x8]
-; ENABLE-NEXT: cbnz x8, LBB12_2
-; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT: cbnz x8, LBB12_1
+; ENABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
; ENABLE-NEXT: mov x8, x10
; ENABLE-NEXT: mov x11, x10
-; ENABLE-NEXT: b LBB12_3
-; ENABLE-NEXT: ; %bb.5: ; %end
-; ENABLE-NEXT: ret
+; ENABLE-NEXT: b LBB12_2
;
; DISABLE-LABEL: infiniteloop3:
; DISABLE: ; %bb.0: ; %entry
-; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; DISABLE-NEXT: mov x8, xzr
; DISABLE-NEXT: mov x9, xzr
; DISABLE-NEXT: mov x11, xzr
-; DISABLE-NEXT: b LBB12_3
-; DISABLE-NEXT: LBB12_2: ; %loop2b
-; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT: b LBB12_2
+; DISABLE-NEXT: LBB12_1: ; %loop2b
+; DISABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
; DISABLE-NEXT: str x10, [x11]
; DISABLE-NEXT: mov x11, x10
-; DISABLE-NEXT: LBB12_3: ; %loop1
+; DISABLE-NEXT: LBB12_2: ; %loop1
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: mov x10, x9
; DISABLE-NEXT: ldr x9, [x8]
-; DISABLE-NEXT: cbnz x8, LBB12_2
-; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT: cbnz x8, LBB12_1
+; DISABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
; DISABLE-NEXT: mov x8, x10
; DISABLE-NEXT: mov x11, x10
-; DISABLE-NEXT: b LBB12_3
-; DISABLE-NEXT: ; %bb.5: ; %end
-; DISABLE-NEXT: ret
+; DISABLE-NEXT: b LBB12_2
entry:
br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 7c3a567d1b336..6e6fb6f367867 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -8,20 +8,14 @@
define i8 @foo_optsize(i32 %v4) optsize {
; CHECK-LABEL: foo_optsize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: b .LBB0_2
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_2: // %b1
-; CHECK-NEXT: cbnz w0, .LBB0_4
-; CHECK-NEXT: // %bb.3: // %b2
+; CHECK-NEXT: cbnz w0, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_4: // %b1
+; CHECK-NEXT: .LBB0_2: // %b1
; CHECK-NEXT: cmp w0, #1
-; CHECK-NEXT: b.ne .LBB0_1
-; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
@@ -47,20 +41,14 @@ b4:
define i8 @foo_optspeed(i32 %v4) {
; CHECK-LABEL: foo_optspeed:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_2: // %b1
-; CHECK-NEXT: cbnz w0, .LBB1_4
-; CHECK-NEXT: // %bb.3: // %b2
+; CHECK-NEXT: cbnz w0, .LBB1_2
+; CHECK-NEXT: // %bb.1: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_4: // %b1
+; CHECK-NEXT: .LBB1_2: // %b1
; CHECK-NEXT: cmp w0, #1
-; CHECK-NEXT: b.ne .LBB1_1
-; CHECK-NEXT: // %bb.5: // %b3
-; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
diff --git a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
index 29427146e8a43..708ba621c26d8 100644
--- a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
+++ b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll
@@ -21,10 +21,8 @@ define i32 @check_lr_liveness(ptr %arg) #1 {
; CHECK-NEXT: B %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb:
- ; CHECK-NEXT: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555)
; CHECK-NEXT: liveins: $w0, $lr
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBNZW $wzr, %bb.3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index abb090ccbcaed..72b2a77e51c06 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -29,11 +29,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: str w4, [sp, #72] // 4-byte Spill
; CHECK-NEXT: str w3, [sp, #112] // 4-byte Spill
; CHECK-NEXT: str w5, [sp, #36] // 4-byte Spill
-; CHECK-NEXT: tbz w5, #0, .LBB0_43
+; CHECK-NEXT: tbz w5, #0, .LBB0_40
; CHECK-NEXT: // %bb.1: // %for.body41.lr.ph
; CHECK-NEXT: ldr x4, [sp, #312]
; CHECK-NEXT: ldr x14, [sp, #280]
-; CHECK-NEXT: tbz w0, #0, .LBB0_42
+; CHECK-NEXT: tbz w0, #0, .LBB0_39
; CHECK-NEXT: // %bb.2: // %for.body41.us.preheader
; CHECK-NEXT: ldrb w8, [sp, #368]
; CHECK-NEXT: ldrb w12, [sp, #256]
@@ -92,7 +92,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_36 Depth 5
; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Reload
; CHECK-NEXT: mov x12, x24
; CHECK-NEXT: str x24, [sp, #48] // 8-byte Spill
@@ -117,7 +117,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_36 Depth 5
; CHECK-NEXT: str x12, [sp, #40] // 8-byte Spill
; CHECK-NEXT: cmn x24, #30
; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2
@@ -142,7 +142,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_36 Depth 5
; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Reload
; CHECK-NEXT: mov w14, #1152 // =0x480
; CHECK-NEXT: mov w24, #1 // =0x1
@@ -176,7 +176,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // => This Loop Header: Depth=4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_36 Depth 5
; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Reload
; CHECK-NEXT: and w8, w8, w8, asr #31
; CHECK-NEXT: str w8, [sp, #128] // 4-byte Spill
@@ -281,31 +281,23 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: mov x24, xzr
; CHECK-NEXT: mul w12, w12, w22
; CHECK-NEXT: mov x22, x5
-; CHECK-NEXT: tbz w0, #0, .LBB0_36
-; CHECK-NEXT: .LBB0_28: // %for.body194.us
+; CHECK-NEXT: tbz w0, #0, .LBB0_33
+; CHECK-NEXT: .LBB0_28: // %if.then222.us
; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
; CHECK-NEXT: // => This Inner Loop Header: Depth=5
-; CHECK-NEXT: // %bb.29: // %if.then222.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: adrp x27, :got:var_32
; CHECK-NEXT: ldur w8, [x19, #-12]
; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_32]
; CHECK-NEXT: strh w8, [x27]
; CHECK-NEXT: sxtb w8, w25
-; CHECK-NEXT: bic w25, w8, w8, asr #31
-; CHECK-NEXT: b .LBB0_31
-; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: // %bb.30:
-; CHECK-NEXT: mov w25, wzr
-; CHECK-NEXT: .LBB0_31: // %if.end239.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: strb w3, [x16]
+; CHECK-NEXT: bic w25, w8, w8, asr #31
; CHECK-NEXT: tst w13, #0xff
-; CHECK-NEXT: b.eq .LBB0_33
-; CHECK-NEXT: // %bb.32: // %if.then254.us
+; CHECK-NEXT: b.eq .LBB0_30
+; CHECK-NEXT: // %bb.29: // %if.then254.us
; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: ldrh w8, [x26, x14, lsl #1]
; CHECK-NEXT: adrp x27, :got:var_35
@@ -314,7 +306,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: csel x8, xzr, x7, eq
; CHECK-NEXT: str x8, [x27]
; CHECK-NEXT: strh w1, [x17]
-; CHECK-NEXT: .LBB0_33: // %if.end282.us
+; CHECK-NEXT: .LBB0_30: // %if.end282.us
; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: orr x27, x24, x4
; CHECK-NEXT: adrp x8, :got:var_39
@@ -325,14 +317,14 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: str x8, [x18]
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cbnz x2, .LBB0_27
-; CHECK-NEXT: // %bb.34: // %if.then327.us
+; CHECK-NEXT: // %bb.31: // %if.then327.us
; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: cbz w8, .LBB0_25
-; CHECK-NEXT: // %bb.35: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: // %bb.32: // in Loop: Header=BB0_28 Depth=5
; CHECK-NEXT: mov w4, wzr
; CHECK-NEXT: b .LBB0_26
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_36: // %for.cond376.preheader.us
+; CHECK-NEXT: .LBB0_33: // %for.cond376.preheader.us
; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
; CHECK-NEXT: mov w3, #1152 // =0x480
; CHECK-NEXT: mov x22, xzr
@@ -343,24 +335,24 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: madd x14, x14, x3, x11
; CHECK-NEXT: mov w28, w30
; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
-; CHECK-NEXT: b .LBB0_39
+; CHECK-NEXT: b .LBB0_36
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_37: // %if.then466.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: .LBB0_34: // %if.then466.us
+; CHECK-NEXT: // in Loop: Header=BB0_36 Depth=5
; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Reload
; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Reload
; CHECK-NEXT: sxtb w4, w4
; CHECK-NEXT: bic w4, w4, w4, asr #31
; CHECK-NEXT: str x3, [x28]
; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
-; CHECK-NEXT: .LBB0_38: // %for.inc505.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: .LBB0_35: // %for.inc505.us
+; CHECK-NEXT: // in Loop: Header=BB0_36 Depth=5
; CHECK-NEXT: add x22, x22, #1
; CHECK-NEXT: add x27, x27, #1
; CHECK-NEXT: mov w28, wzr
; CHECK-NEXT: cmp x27, #0
; CHECK-NEXT: b.hs .LBB0_9
-; CHECK-NEXT: .LBB0_39: // %for.body380.us
+; CHECK-NEXT: .LBB0_36: // %for.body380.us
; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
@@ -372,18 +364,18 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: strh w28, [x11]
; CHECK-NEXT: csel w28, w21, w3, ne
; CHECK-NEXT: str w28, [x20]
-; CHECK-NEXT: cbz x15, .LBB0_38
-; CHECK-NEXT: // %bb.40: // %if.then436.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: cbz x15, .LBB0_35
+; CHECK-NEXT: // %bb.37: // %if.then436.us
+; CHECK-NEXT: // in Loop: Header=BB0_36 Depth=5
; CHECK-NEXT: ldrh w28, [x14]
-; CHECK-NEXT: cbnz w28, .LBB0_37
-; CHECK-NEXT: // %bb.41: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: cbnz w28, .LBB0_34
+; CHECK-NEXT: // %bb.38: // in Loop: Header=BB0_36 Depth=5
; CHECK-NEXT: mov w4, wzr
-; CHECK-NEXT: b .LBB0_38
-; CHECK-NEXT: .LBB0_42: // %for.body41
+; CHECK-NEXT: b .LBB0_35
+; CHECK-NEXT: .LBB0_39: // %for.body41
; CHECK-NEXT: strb wzr, [x4]
; CHECK-NEXT: strb wzr, [x14]
-; CHECK-NEXT: .LBB0_43: // %for.cond563.preheader
+; CHECK-NEXT: .LBB0_40: // %for.cond563.preheader
; CHECK-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll
index 48967d30485ab..6f54b0465fbfd 100644
--- a/llvm/test/CodeGen/AArch64/pr166870.ll
+++ b/llvm/test/CodeGen/AArch64/pr166870.ll
@@ -26,12 +26,11 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi
; CHECK-NEXT: mov x21, x1
; CHECK-NEXT: bl baz
; CHECK-NEXT: mov w0, #0 // =0x0
-; CHECK-NEXT: // %bb.5: // %bb6
; CHECK-NEXT: mov w10, #1 // =0x1
-; CHECK-NEXT: cbnz w10, .LBB0_11
-; CHECK-NEXT: // %bb.6: // %bb7
; CHECK-NEXT: cbnz w10, .LBB0_10
-; CHECK-NEXT: // %bb.7: // %bb8
+; CHECK-NEXT: // %bb.5: // %bb7
+; CHECK-NEXT: cbnz w10, .LBB0_9
+; CHECK-NEXT: // %bb.6: // %bb8
; CHECK-NEXT: mov x8, x21
; CHECK-NEXT: mov x9, x20
; CHECK-NEXT: mov w20, #0 // =0x0
@@ -39,17 +38,17 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi
; CHECK-NEXT: mov x21, x9
; CHECK-NEXT: mov w8, w8
; CHECK-NEXT: mov x22, x8
-; CHECK-NEXT: .LBB0_8: // %bb10
+; CHECK-NEXT: .LBB0_7: // %bb10
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: strb w20, [x19]
-; CHECK-NEXT: cbnz x21, .LBB0_8
-; CHECK-NEXT: // %bb.9: // %bb12
-; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=1
+; CHECK-NEXT: cbnz x21, .LBB0_7
+; CHECK-NEXT: // %bb.8: // %bb12
+; CHECK-NEXT: // in Loop: Header=BB0_7 Depth=1
; CHECK-NEXT: bl snork
-; CHECK-NEXT: cbnz x22, .LBB0_8
-; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: cbnz x22, .LBB0_7
+; CHECK-NEXT: .LBB0_9:
; CHECK-NEXT: mov w0, #0 // =0x0
-; CHECK-NEXT: .LBB0_11:
+; CHECK-NEXT: .LBB0_10:
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index f4815dc331056..215b11a746759 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -71,27 +71,21 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: str w8, [sp, #16]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbz w8, .LBB0_4
-; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: cbz w8, .LBB0_2
+; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbnz w8, .LBB0_2
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .cfi_def_cfa wsp, 48
; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
@@ -134,7 +128,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
;
; CHECK-LABEL: OUTLINED_FUNCTION_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stp w9, w8, [x29, #-12]
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: mov w8, #4 // =0x4
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
index bc3500522672d..bf7cf2b54983b 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected
@@ -12,27 +12,21 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: stur xzr, [x29, #-8]
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: str w8, [sp, #16]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbz w8, .LBB0_4
-; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: cbz w8, .LBB0_2
+; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: str w8, [sp, #16]
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: ldur w8, [x29, #-8]
-; CHECK-NEXT: cbnz w8, .LBB0_2
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
-; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .cfi_def_cfa wsp, 48
; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
More information about the llvm-commits
mailing list