[llvm] c4a60c9 - [CodeGen][ShrinkWrap] Enable PostShrinkWrap by default
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 25 01:27:26 PDT 2023
Author: sgokhale
Date: 2023-05-25T13:56:29+05:30
New Revision: c4a60c9d34375e73fc2da5e02215eabe4bc90e8f
URL: https://github.com/llvm/llvm-project/commit/c4a60c9d34375e73fc2da5e02215eabe4bc90e8f
DIFF: https://github.com/llvm/llvm-project/commit/c4a60c9d34375e73fc2da5e02215eabe4bc90e8f.diff
LOG: [CodeGen][ShrinkWrap] Enable PostShrinkWrap by default
This is an attempt to reland D42600 and enabling this optimisation by default.
This also resolves the issue pointed out in the context of PGO build.
Differential Revision: https://reviews.llvm.org/D42600
Added:
llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
Modified:
llvm/lib/CodeGen/ShrinkWrap.cpp
llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
llvm/test/CodeGen/AArch64/ragreedy-csr.ll
llvm/test/CodeGen/AArch64/taildup-cfi.ll
llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
llvm/test/CodeGen/ARM/code-placement.ll
llvm/test/CodeGen/ARM/mbp.ll
llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
llvm/test/CodeGen/PowerPC/common-chain.ll
llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
llvm/test/CodeGen/PowerPC/shrink-wrap.ll
llvm/test/CodeGen/PowerPC/shrink-wrap.mir
llvm/test/CodeGen/RISCV/aext-to-sext.ll
llvm/test/CodeGen/RISCV/fli-licm.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
llvm/test/CodeGen/X86/fold-call-3.ll
llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
llvm/test/CodeGen/X86/pr44412.ll
llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index ca74c8ca13bbf..6dd9a81b7f84b 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -99,7 +99,7 @@ static cl::opt<cl::boolOrDefault>
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
static cl::opt<bool> EnablePostShrinkWrapOpt(
- "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden,
+ "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
cl::desc("enable splitting of the restore block if possible"));
namespace {
@@ -635,7 +635,10 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
- EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ /*Entry freq has been observed more than a loop block in
+ some cases*/
+ MLI->getLoopFor(NewSave)))
NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
false);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 49a15528c041a..8dd4da1ee4401 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -424,8 +424,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(i8* noalias nocapture noundef readonly %A,
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: b .LBB5_7
; CHECK-NEXT: .LBB5_3:
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: b .LBB5_9
; CHECK-NEXT: .LBB5_4: // %vector.ph
; CHECK-NEXT: and x11, x10, #0xfffffff0
; CHECK-NEXT: add x8, x0, #8
diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
index f919fa39021a2..b9086f4f5646f 100644
--- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
+++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
@@ -6,13 +6,12 @@
; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
; CHECK: name: compiler_pop_stack
; CHECK: frameInfo:
- ; CHECK-NOT: savePoint:
- ; CHECK-NOT: restorePoint:
+ ; CHECK: savePoint: '%bb.1'
+ ; CHECK: restorePoint: '%bb.7'
; CHECK: name: compiler_pop_stack_no_memoperands
; CHECK: frameInfo:
- ; CHECK-NOT: savePoint:
- ; CHECK-NOT: restorePoint:
- ; CHECK: stack:
+ ; CHECK: savePoint: '%bb.1'
+ ; CHECK: restorePoint: '%bb.7'
; CHECK: name: f
; CHECK: frameInfo:
; CHECK: savePoint: '%bb.2'
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 98c95c38bbb6b..99f01883dbfb1 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -21,16 +21,16 @@ declare i32 @__maskrune(i32, i64) #7
define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 {
; CHECK-LABEL: prune_match:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: ldrh w9, [x1]
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: b.ne LBB0_47
+; CHECK-NEXT: ; %bb.1: ; %if.end
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne LBB0_42
-; CHECK-NEXT: ; %bb.1: ; %if.end
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x14, __DefaultRuneLocale at GOTPAGE
; CHECK-NEXT: mov x9, xzr
@@ -243,7 +243,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
; CHECK-NEXT: b.eq LBB0_37
; CHECK-NEXT: LBB0_42:
; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: LBB0_43: ; %return
+; CHECK-NEXT: LBB0_43:
; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
@@ -259,6 +259,12 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52
; CHECK-NEXT: cbz w8, LBB0_43
; CHECK-NEXT: b LBB0_12
+; CHECK-NEXT: LBB0_47:
+; CHECK-NEXT: .cfi_def_cfa wsp, 0
+; CHECK-NEXT: .cfi_same_value w30
+; CHECK-NEXT: .cfi_same_value w29
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
; CHECK-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; CHECK-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
diff --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
new file mode 100644
index 0000000000000..5b43dde0ae250
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
@@ -0,0 +1,760 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s
+
+--- |
+ define void @shrink_test1(i32 %a) {
+ entry:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB0, label %exit
+
+ BB0: ; preds = %entry
+ %call = call i32 @fun()
+ %c = icmp eq i32 %call, 0
+ br i1 %c, label %BB1, label %exit
+
+ BB1: ; preds = %BB0
+ %call2 = call i32 @fun()
+ br label %exit
+
+ exit: ; preds = %BB1, %BB0, %entry
+ ret void
+ }
+
+ define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB01, label %exit
+
+ BB01: ; preds = %BB00
+ store i32 %a, ptr %P1, align 4
+ %c1 = icmp sgt i32 %a, 1
+ br i1 %c1, label %BB02, label %BB03
+
+ BB02: ; preds = %BB01
+ store i32 %a, ptr %P2, align 4
+ br label %BB03
+
+ BB03: ; preds = %BB02, %BB01
+ %call03 = call i32 @fun()
+ %c03 = icmp eq i32 %call03, 0
+ br i1 %c03, label %BB04, label %BB05
+
+ BB04: ; preds = %BB03
+ %call04 = call i32 @fun()
+ br label %BB05
+
+ BB05: ; preds = %BB04, %BB03
+ %call05 = call i32 @fun()
+ %c05 = icmp eq i32 %call05, 0
+ br i1 %c05, label %BB06, label %BB07
+
+ BB06: ; preds = %BB05
+ %call06 = call i32 @fun()
+ br label %exit
+
+ BB07: ; preds = %BB05
+ %call07 = call i32 @fun2()
+ br label %exit
+
+ exit: ; preds = %BB07, %BB06, %BB00
+ ret void
+ }
+
+ define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) {
+ entry:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB0, label %exit
+
+ BB0: ; preds = %entry
+ %c = icmp eq i32 %a, 10
+ %c1 = icmp eq i32 %v, 10
+ %or.cond = select i1 %c, i1 %c1, i1 false
+ br i1 %or.cond, label %BB3, label %BB2
+
+ BB2: ; preds = %BB0
+ %c2 = icmp eq i32 %v2, 10
+ br i1 %c2, label %BB4, label %exit
+
+ BB3: ; preds = %BB0
+ %call3 = call i32 @fun()
+ br label %exit
+
+ BB4: ; preds = %BB2
+ %call4 = call i32 @fun2()
+ br label %exit
+
+ exit: ; preds = %BB4, %BB3, %BB2, %entry
+ ret void
+ }
+
+ define void @noshrink_test2(i32 %a) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ br i1 %cmp5, label %BB01, label %InfLoop.preheader
+
+ InfLoop.preheader: ; preds = %BB00
+ br label %InfLoop
+
+ BB01: ; preds = %BB00
+ %call = call i32 @fun()
+ %c = icmp eq i32 %call, 0
+ br i1 %c, label %BB02, label %exit
+
+ BB02: ; preds = %BB01
+ %call2 = call i32 @fun()
+ br label %exit
+
+ InfLoop: ; preds = %InfLoop.preheader, %InfLoop
+ %call3 = call i32 @fun()
+ br label %InfLoop
+
+ exit: ; preds = %BB02, %BB01
+ ret void
+ }
+
+ define void @noshrink_test3(i32 %a) {
+ BB00:
+ %cmp5 = icmp sgt i32 %a, 0
+ %call02 = call i32 @fun()
+ br i1 %cmp5, label %BB02, label %BB01
+
+ BB01: ; preds = %BB00
+ %0 = icmp eq i32 %call02, 0
+ br i1 %0, label %BB01.1, label %exit
+
+ BB01.1: ; preds = %BB01
+ call void @abort() #0
+ unreachable
+
+ BB02: ; preds = %BB00
+ %1 = icmp eq i32 %call02, 0
+ br i1 %1, label %BB03, label %BB04
+
+ BB03: ; preds = %BB02
+ %call03 = call i32 @fun()
+ %c03 = icmp eq i32 %call03, 0
+ br i1 %c03, label %BB04, label %exit
+
+ BB04: ; preds = %BB03, %BB02
+ %call04 = call i32 @fun()
+ br label %exit
+
+ exit: ; preds = %BB04, %BB03, %BB01
+ ret void
+ }
+
+ define void @noshrink_bb_as_inlineasmbr_target(i1 %cond) {
+ entry:
+ br i1 %cond, label %0, label %exit
+
+ 0: ; preds = %entry
+ callbr void asm sideeffect "", "!i,~{flags}"()
+ to label %1 [label %exit]
+
+ 1: ; preds = %0
+ call void @dosomething()
+ br label %exit
+
+ exit: ; preds = %1, %0, %entry
+ ret void
+ }
+
+ declare i32 @fun()
+ declare i32 @fun2()
+ declare void @abort()
+ declare void @dosomething()
+...
+---
+name: shrink_test1
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: shrink_test1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB0:
+ ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.4(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.4
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.3
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.3(0x30000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.3, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB0:
+ successors: %bb.2(0x30000000), %bb.3(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.3
+ B %bb.2
+
+ bb.2.BB1:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.exit:
+ RET_ReallyLR
+
+...
+---
+name: shrink_test2
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: shrink_test2
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.8(0x30000000)
+ ; CHECK-NEXT: liveins: $w0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.8, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB01:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ ; CHECK-NEXT: STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
+ ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB02:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB03:
+ ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB04:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB05:
+ ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.7
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.BB06:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.BB07:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.8
+ bb.0.BB00:
+ successors: %bb.1(0x50000000), %bb.8(0x30000000)
+ liveins: $w0, $x1, $x2
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.8, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB01:
+ successors: %bb.2, %bb.3
+ liveins: $w0, $x1, $x2
+
+ dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
+ Bcc 11, %bb.3, implicit killed $nzcv
+ B %bb.2
+
+ bb.2.BB02:
+ liveins: $w0, $x2
+
+ STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
+
+ bb.3.BB03:
+ successors: %bb.4(0x30000000), %bb.5(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.4
+
+ bb.4.BB04:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.5.BB05:
+ successors: %bb.6(0x30000000), %bb.7(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.7
+ B %bb.6
+
+ bb.6.BB06:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.8
+
+ bb.7.BB07:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.8.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test1
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$w1' }
+ - { reg: '$w2' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.6(0x30000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 11, %bb.6, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB0:
+ ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB0:
+ ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
+ ; CHECK-NEXT: liveins: $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB2:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: liveins: $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB3:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB4:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.6(0x30000000)
+ liveins: $w0, $w1, $w2
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.6, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB0:
+ successors: %bb.2(0x60000000), %bb.3(0x20000000)
+ liveins: $w0, $w1, $w2
+
+ dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit killed $nzcv
+ B %bb.2
+
+ bb.2.BB0:
+ successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
+ liveins: $w1, $w2
+
+ dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
+ Bcc 0, %bb.4, implicit killed $nzcv
+ B %bb.3
+
+ bb.3.BB2:
+ liveins: $w2
+
+ dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
+ Bcc 0, %bb.5, implicit killed $nzcv
+ B %bb.6
+
+ bb.4.BB3:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.6
+
+ bb.5.BB4:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.6.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test2
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test2
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.2, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB01:
+ ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB02:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.InfLoop:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.BB00:
+ successors: %bb.2(0x50000000), %bb.1(0x30000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
+ Bcc 12, %bb.2, implicit killed $nzcv
+
+ bb.1:
+ B %bb.4
+
+ bb.2.BB01:
+ successors: %bb.3(0x30000000), %bb.5(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.3
+
+ bb.3.BB02:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.5
+
+ bb.4.InfLoop:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.4
+
+ bb.5.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_test3
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+liveins:
+ - { reg: '$w0' }
+frameInfo:
+ maxAlignment: 1
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: noshrink_test3
+ ; CHECK: bb.0.BB00:
+ ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $w19 = COPY $w0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.3, implicit killed $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.BB01:
+ ; CHECK-NEXT: successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.BB01.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.BB02:
+ ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.BB03:
+ ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.6(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.BB04:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.exit:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.BB00:
+ successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ liveins: $w0
+
+ renamable $w19 = COPY $w0
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
+ Bcc 12, %bb.3, implicit killed $nzcv
+ B %bb.1
+
+ bb.1.BB01:
+ successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
+ liveins: $w0
+
+ CBNZW killed renamable $w0, %bb.6
+ B %bb.2
+
+ bb.2.BB01.1:
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.BB02:
+ successors: %bb.4(0x30000000), %bb.5(0x50000000)
+ liveins: $w0
+
+ CBNZW killed renamable $w0, %bb.5
+ B %bb.4
+
+ bb.4.BB03:
+ successors: %bb.5(0x30000000), %bb.6(0x50000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ CBNZW killed renamable $w0, %bb.6
+ B %bb.5
+
+ bb.5.BB04:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.6.exit:
+ RET_ReallyLR
+
+...
+---
+name: noshrink_bb_as_inlineasmbr_target
+registers: []
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ savePoint: ''
+ restorePoint: ''
+body: |
+ ; CHECK-LABEL: name: noshrink_bb_as_inlineasmbr_target
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TBZW killed renamable $w0, 0, %bb.3
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.3(0x00000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2 (%ir-block.1):
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ liveins: $w0
+
+ TBZW killed renamable $w0, 0, %bb.3
+ B %bb.1
+
+ bb.1 (%ir-block.0):
+ successors: %bb.2(0x80000000), %bb.3(0x00000000)
+
+ INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
+ B %bb.2
+
+ bb.2 (%ir-block.1):
+ successors: %bb.3(0x80000000)
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
+ RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
index 221503009cdb6..4a87ceefbcf03 100644
--- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
@@ -32,7 +32,7 @@ if.then: ; preds = %entry
store i32 0, ptr @f, align 4, !tbaa !2
br label %if.end
-; DARWIN-NOT: Merging into block
+; DARWIN: Merging into block
; LINUX: Merging into block
if.end: ; preds = %entry.if.end_crit_edge, %if.then
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index 050696ad653eb..e45985136cf34 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -5,11 +5,11 @@
define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: add_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB0_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
@@ -22,22 +22,23 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-LE-NEXT: sxtah r1, r1, r3
; CHECK-LE-NEXT: smlad r12, r4, r3, r12
; CHECK-LE-NEXT: bne .LBB0_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB0_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: add_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB0_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
@@ -53,14 +54,15 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB0_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB0_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -105,11 +107,11 @@ for.body:
define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: mul_bottom_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB1_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
@@ -123,22 +125,23 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
; CHECK-LE-NEXT: sxth r3, r3
; CHECK-LE-NEXT: mul r1, r3, r1
; CHECK-LE-NEXT: bne .LBB1_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB1_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: mul_bottom_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB1_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
@@ -154,14 +157,15 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB1_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB1_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -206,11 +210,11 @@ for.body:
define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: mul_top_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB2_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: subs r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
@@ -224,22 +228,23 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
; CHECK-LE-NEXT: asr.w r4, r4, #16
; CHECK-LE-NEXT: mul r1, r4, r1
; CHECK-LE-NEXT: bne .LBB2_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB2_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: mul_top_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, lr}
-; CHECK-BE-NEXT: push {r4, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB2_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, lr}
+; CHECK-BE-NEXT: push {r4, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
@@ -255,14 +260,15 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
; CHECK-BE-NEXT: mul r1, r4, r1
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
; CHECK-BE-NEXT: bne .LBB2_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB2_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -307,11 +313,11 @@ for.body:
define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LE-LABEL: and_user:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: .save {r4, lr}
-; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB3_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-LE-NEXT: .save {r4, lr}
+; CHECK-LE-NEXT: push {r4, lr}
; CHECK-LE-NEXT: sub.w lr, r3, #2
; CHECK-LE-NEXT: subs r2, #2
; CHECK-LE-NEXT: mov.w r12, #0
@@ -325,22 +331,23 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-LE-NEXT: uxth r3, r3
; CHECK-LE-NEXT: mul r1, r3, r1
; CHECK-LE-NEXT: bne .LBB3_2
-; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-LE-NEXT: @ %bb.3:
+; CHECK-LE-NEXT: pop.w {r4, lr}
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB3_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: add.w r0, r12, r1
-; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: and_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB3_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: subs r3, #2
; CHECK-BE-NEXT: subs r2, #2
; CHECK-BE-NEXT: mov.w r12, #0
@@ -356,14 +363,15 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB3_2
-; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
+; CHECK-BE-NEXT: @ %bb.3:
+; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr}
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB3_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-BE-NEXT: bx lr
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 7755ff53512ef..01d72f134aacb 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -11,7 +11,6 @@ entry:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_1:
; CHECK: LBB0_[[LABEL:[0-9]]]:
; CHECK: bne LBB0_[[LABEL]]
; CHECK-NOT: b LBB0_[[LABEL]]
diff --git a/llvm/test/CodeGen/ARM/mbp.ll b/llvm/test/CodeGen/ARM/mbp.ll
index e7ab3860b52ac..4f96029e06b95 100644
--- a/llvm/test/CodeGen/ARM/mbp.ll
+++ b/llvm/test/CodeGen/ARM/mbp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7-unknown-linux-gnueabihf"
@@ -6,16 +7,50 @@ target triple = "thumbv7-unknown-linux-gnueabihf"
%List = type { i32, ptr }
; The entry block should be the first block of the function.
-; CHECK-LABEL: foo
-; CHECK: %entry
-; CHECK: %for.body
-; CHECK: %for.inc
-; CHECK: %if.then
-; CHECK: %for.cond.i
-; CHECK: %for.body.i
-; CHECK: %return
define i1 @foo(ptr %ha, i32 %he) !prof !39 {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldr r2, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r0, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2: @ %for.inc
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: ldr r2, [r2]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: popeq {r7, pc}
+; CHECK-NEXT: .LBB0_3: @ %for.body
+; CHECK-NEXT: @ =>This Loop Header: Depth=1
+; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
+; CHECK-NEXT: ldr r0, [r2, #4]
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: beq .LBB0_2
+; CHECK-NEXT: @ %bb.4: @ %if.then
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: ldrd r3, r0, [r0]
+; CHECK-NEXT: sub.w r12, r0, #4
+; CHECK-NEXT: .LBB0_5: @ %for.cond.i
+; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: blt .LBB0_2
+; CHECK-NEXT: @ %bb.6: @ %for.body.i
+; CHECK-NEXT: @ in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: ldr.w lr, [r12, r3, lsl #2]
+; CHECK-NEXT: subs r3, #1
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: cmp lr, r1
+; CHECK-NEXT: bne .LBB0_5
+; CHECK-NEXT: @ %bb.7:
+; CHECK-NEXT: pop {r7, pc}
entry:
%TargetPtr = load ptr, ptr %ha, align 4
%cmp1 = icmp eq ptr %TargetPtr, null
diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
index 2755d354a6244..c9724674afd82 100644
--- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -6,11 +6,11 @@
define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: sub r12, r3, #1
; CHECK-NEXT: tst r3, #1
; CHECK-NEXT: beq .LBB0_3
@@ -23,7 +23,7 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: popeq {r11, pc}
+; CHECK-NEXT: beq .LBB0_5
; CHECK-NEXT: .LBB0_4: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r12, [r0]
@@ -41,8 +41,9 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
; CHECK-NEXT: strh r12, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
; CHECK-NEXT: bne .LBB0_4
-; CHECK-NEXT: .LBB0_5: @ %while.end
-; CHECK-NEXT: pop {r11, pc}
+; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
@@ -125,11 +126,11 @@ while.end: ; preds = %while.body, %while.
define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll_minmax:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %while.body.preheader
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB1_5
-; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: sub r12, r3, #1
; CHECK-NEXT: tst r3, #1
; CHECK-NEXT: beq .LBB1_3
@@ -142,7 +143,7 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: popeq {r11, pc}
+; CHECK-NEXT: beq .LBB1_5
; CHECK-NEXT: .LBB1_4: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r12, [r0]
@@ -160,8 +161,9 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
; CHECK-NEXT: strh r12, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
; CHECK-NEXT: bne .LBB1_4
-; CHECK-NEXT: .LBB1_5: @ %while.end
-; CHECK-NEXT: pop {r11, pc}
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
index 0cf7119eab84c..35ddcfd9ba6d6 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
@@ -39,19 +39,19 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmplwi r6, 0
; CHECK-NEXT: cmpwi cr1, r6, 0
-; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
-; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq
; CHECK-NEXT: cmpwi cr1, r7, 0
-; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5
+; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq
-; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5
+; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6
; CHECK-NEXT: # %bb.2: # %for.body.preheader
; CHECK-NEXT: slwi r8, r4, 1
; CHECK-NEXT: li r10, 0
; CHECK-NEXT: li r11, 0
+; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
; CHECK-NEXT: add r8, r4, r8
+; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
; CHECK-NEXT: add r9, r5, r8
; CHECK-NEXT: add r5, r5, r4
; CHECK-NEXT: add r8, r3, r5
@@ -83,15 +83,15 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
; CHECK-NEXT: #
; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3
-; CHECK-NEXT: b L..BB0_6
-; CHECK-NEXT: L..BB0_5:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload
; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload
; CHECK-NEXT: mr r4, r5
; CHECK-NEXT: blr
+; CHECK-NEXT: L..BB0_6:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: blr
entry:
%add = add nsw i32 %base1, %offset
%mul = shl nsw i32 %offset, 1
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ea8a72e7d11e1..5f8c21e30f8fd 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -137,14 +137,14 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
; CHECK-LABEL: not_perfect_chain_all_same_offset_fail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpdi r6, 0
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB1_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: sldi r7, r4, 1
-; CHECK-NEXT: sldi r9, r4, 2
; CHECK-NEXT: add r5, r3, r5
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: add r8, r4, r7
+; CHECK-NEXT: sldi r9, r4, 2
; CHECK-NEXT: mtctr r6
; CHECK-NEXT: add r10, r4, r9
; CHECK-NEXT: .p2align 4
@@ -161,12 +161,11 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
; CHECK-NEXT: mulld r6, r6, r0
; CHECK-NEXT: maddld r3, r6, r30, r3
; CHECK-NEXT: bdnz .LBB1_2
-; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
entry:
%mul = shl nsw i64 %offset, 1
@@ -425,20 +424,20 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
; CHECK-LABEL: not_same_offset_fail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpdi r6, 0
+; CHECK-NEXT: ble cr0, .LBB4_4
+; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: add r5, r3, r5
+; CHECK-NEXT: li r3, 0
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ble cr0, .LBB4_3
-; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: mtctr r6
; CHECK-NEXT: mulli r11, r4, 10
; CHECK-NEXT: sldi r8, r4, 2
-; CHECK-NEXT: add r5, r3, r5
-; CHECK-NEXT: li r3, 0
; CHECK-NEXT: add r8, r4, r8
; CHECK-NEXT: sldi r9, r4, 3
-; CHECK-NEXT: mtctr r6
-; CHECK-NEXT: sldi r7, r4, 1
; CHECK-NEXT: sub r10, r9, r4
+; CHECK-NEXT: sldi r7, r4, 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB4_2: # %for.body
; CHECK-NEXT: #
@@ -455,14 +454,14 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
; CHECK-NEXT: mulld r6, r6, r29
; CHECK-NEXT: maddld r3, r6, r28, r3
; CHECK-NEXT: bdnz .LBB4_2
-; CHECK-NEXT: b .LBB4_4
-; CHECK-NEXT: .LBB4_3:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB4_4: # %for.cond.cleanup
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: blr
entry:
%mul = shl nsw i64 %offset, 1
%mul2 = mul nsw i64 %offset, 5
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index 769b358131e9a..37baef6043884 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -192,21 +192,21 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-LABEL: test_max_number_reminder:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r4, 0
-; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB2_3
+; CHECK-NEXT: beq cr0, .LBB2_4
; CHECK-NEXT: # %bb.1: # %bb3.preheader
; CHECK-NEXT: cmpldi r4, 1
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: addi r9, r3, 4002
+; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r6, -1
+; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r7, 3
; CHECK-NEXT: li r8, 5
; CHECK-NEXT: li r10, 9
+; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: iselgt r3, r4, r5
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: li r3, 0
@@ -232,10 +232,7 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: mulld r11, r11, r26
; CHECK-NEXT: maddld r3, r11, r25, r3
; CHECK-NEXT: bdnz .LBB2_2
-; CHECK-NEXT: b .LBB2_4
-; CHECK-NEXT: .LBB2_3:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB2_4: # %bb45
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
@@ -244,6 +241,9 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: addi r3, r4, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg1 to i64
%i2 = icmp eq i32 %arg1, 0
@@ -475,11 +475,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
; CHECK-LABEL: test_ds_multiple_chains:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r5, 0
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB5_3
+; CHECK-NEXT: beq cr0, .LBB5_4
; CHECK-NEXT: # %bb.1: # %bb4.preheader
; CHECK-NEXT: cmpldi r5, 1
; CHECK-NEXT: li r6, 1
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r3, r3, 4001
; CHECK-NEXT: addi r4, r4, 4001
; CHECK-NEXT: li r7, 9
@@ -507,13 +507,13 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
; CHECK-NEXT: mulld r8, r8, r30
; CHECK-NEXT: maddld r6, r8, r9, r6
; CHECK-NEXT: bdnz .LBB5_2
-; CHECK-NEXT: b .LBB5_4
-; CHECK-NEXT: .LBB5_3:
-; CHECK-NEXT: li r6, 0
-; CHECK-NEXT: .LBB5_4: # %bb43
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r3, r6, r5
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: addi r3, r5, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg2 to i64
%i3 = icmp eq i32 %arg2, 0
@@ -595,17 +595,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-LABEL: test_ds_cross_basic_blocks:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmplwi r4, 0
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: beq cr0, .LBB6_8
+; CHECK-NEXT: beq cr0, .LBB6_9
; CHECK-NEXT: # %bb.1: # %bb3
; CHECK-NEXT: addis r5, r2, .LC0 at toc@ha
; CHECK-NEXT: cmpldi r4, 1
; CHECK-NEXT: li r7, 1
; CHECK-NEXT: addi r6, r3, 4009
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r5, .LC0 at toc@l(r5)
; CHECK-NEXT: iselgt r3, r4, r7
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: li r4, -7
; CHECK-NEXT: li r8, -6
; CHECK-NEXT: li r9, 1
@@ -634,7 +634,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: mulld r0, r0, r10
; CHECK-NEXT: mulld r0, r0, r9
; CHECK-NEXT: maddld r3, r0, r7, r3
-; CHECK-NEXT: bdz .LBB6_9
+; CHECK-NEXT: bdz .LBB6_8
; CHECK-NEXT: .LBB6_4: # %bb5
; CHECK-NEXT: #
; CHECK-NEXT: lbzu r0, 1(r5)
@@ -666,12 +666,13 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: add r7, r0, r7
; CHECK-NEXT: b .LBB6_3
; CHECK-NEXT: .LBB6_8:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: .LBB6_9: # %bb64
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB6_9:
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: blr
bb:
%i = sext i32 %arg1 to i64
%i2 = icmp eq i32 %arg1, 0
diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index b91f20b710a2d..79f2ef3e3746a 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,24 +6,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpd 5, 7
-; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; CHECK-NEXT: bgelr 0
+; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 28, 5, 3
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT: bge 0, .LBB0_6
-; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: addi 30, 5, 1
-; CHECK-NEXT: addi 28, 5, 3
-; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: mulld 12, 8, 5
-; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: mulld 0, 9, 8
+; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: sldi 11, 10, 3
+; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
; CHECK-NEXT: mulld 30, 8, 30
; CHECK-NEXT: mulld 28, 8, 28
; CHECK-NEXT: mulld 8, 8, 27
diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
index 08c391e34c6f4..12d0b056ca886 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
@@ -7,6 +7,9 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-LABEL: shrinkwrapme:
; POWERPC64: # %bb.0: # %entry
; POWERPC64-NEXT: cmpwi 4, 0
+; POWERPC64-NEXT: ble 0, .LBB0_4
+; POWERPC64-NEXT: # %bb.1: # %for.body.preheader
+; POWERPC64-NEXT: addi 4, 4, -1
; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill
@@ -22,14 +25,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: clrldi 4, 4, 32
+; POWERPC64-NEXT: addi 4, 4, 1
; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: ble 0, .LBB0_3
-; POWERPC64-NEXT: # %bb.1: # %for.body.preheader
-; POWERPC64-NEXT: addi 4, 4, -1
-; POWERPC64-NEXT: clrldi 4, 4, 32
-; POWERPC64-NEXT: addi 4, 4, 1
; POWERPC64-NEXT: mtctr 4
; POWERPC64-NEXT: li 4, 0
; POWERPC64-NEXT: .p2align 4
@@ -39,10 +39,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-NEXT: add 4, 3, 4
; POWERPC64-NEXT: #NO_APP
; POWERPC64-NEXT: bdnz .LBB0_2
-; POWERPC64-NEXT: b .LBB0_4
-; POWERPC64-NEXT: .LBB0_3:
-; POWERPC64-NEXT: li 4, 0
-; POWERPC64-NEXT: .LBB0_4: # %for.cond.cleanup
+; POWERPC64-NEXT: # %bb.3:
; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload
@@ -63,10 +60,16 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload
; POWERPC64-NEXT: blr
+; POWERPC64-NEXT: .LBB0_4:
+; POWERPC64-NEXT: li 4, 0
+; POWERPC64-NEXT: extsw 3, 4
+; POWERPC64-NEXT: blr
;
; POWERPC32-AIX-LABEL: shrinkwrapme:
; POWERPC32-AIX: # %bb.0: # %entry
; POWERPC32-AIX-NEXT: cmpwi 4, 0
+; POWERPC32-AIX-NEXT: ble 0, L..BB0_4
+; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader
; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill
@@ -85,8 +88,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: ble 0, L..BB0_3
-; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader
; POWERPC32-AIX-NEXT: mtctr 4
; POWERPC32-AIX-NEXT: li 4, 0
; POWERPC32-AIX-NEXT: .align 4
@@ -96,10 +97,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC32-AIX-NEXT: add 4, 3, 4
; POWERPC32-AIX-NEXT: #NO_APP
; POWERPC32-AIX-NEXT: bdnz L..BB0_2
-; POWERPC32-AIX-NEXT: b L..BB0_4
-; POWERPC32-AIX-NEXT: L..BB0_3:
-; POWERPC32-AIX-NEXT: li 4, 0
-; POWERPC32-AIX-NEXT: L..BB0_4: # %for.cond.cleanup
+; POWERPC32-AIX-NEXT: # %bb.3:
; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload
@@ -120,10 +118,16 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: blr
+; POWERPC32-AIX-NEXT: L..BB0_4:
+; POWERPC32-AIX-NEXT: li 3, 0
+; POWERPC32-AIX-NEXT: blr
;
; POWERPC64-AIX-LABEL: shrinkwrapme:
; POWERPC64-AIX: # %bb.0: # %entry
; POWERPC64-AIX-NEXT: cmpwi 4, 1
+; POWERPC64-AIX-NEXT: blt 0, L..BB0_4
+; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader
+; POWERPC64-AIX-NEXT: addi 4, 4, -1
; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill
@@ -139,14 +143,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: clrldi 4, 4, 32
+; POWERPC64-AIX-NEXT: addi 4, 4, 1
; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: blt 0, L..BB0_3
-; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader
-; POWERPC64-AIX-NEXT: addi 4, 4, -1
-; POWERPC64-AIX-NEXT: clrldi 4, 4, 32
-; POWERPC64-AIX-NEXT: addi 4, 4, 1
; POWERPC64-AIX-NEXT: mtctr 4
; POWERPC64-AIX-NEXT: li 4, 0
; POWERPC64-AIX-NEXT: .align 4
@@ -156,10 +157,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-AIX-NEXT: add 4, 3, 4
; POWERPC64-AIX-NEXT: #NO_APP
; POWERPC64-AIX-NEXT: bdnz L..BB0_2
-; POWERPC64-AIX-NEXT: b L..BB0_4
-; POWERPC64-AIX-NEXT: L..BB0_3:
-; POWERPC64-AIX-NEXT: li 4, 0
-; POWERPC64-AIX-NEXT: L..BB0_4: # %for.cond.cleanup
+; POWERPC64-AIX-NEXT: # %bb.3:
; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload
@@ -180,6 +178,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: blr
+; POWERPC64-AIX-NEXT: L..BB0_4:
+; POWERPC64-AIX-NEXT: li 4, 0
+; POWERPC64-AIX-NEXT: extsw 3, 4
+; POWERPC64-AIX-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %lim, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
index 1b6ccb92527e7..561b193086bf5 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
@@ -48,42 +48,7 @@
...
---
name: shrinkwrapme
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
tracksRegLiveness: true
-hasWinCFI: false
-registers: []
-liveins:
- - { reg: '$x3', virtual-reg: '' }
- - { reg: '$x4', virtual-reg: '' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 0
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack: []
-stack: []
-callSites: []
-constants: []
-machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: shrinkwrapme
; CHECK: bb.0.entry:
@@ -117,11 +82,17 @@ body: |
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.body:
- ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
; CHECK-NEXT: liveins: $r4, $x3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"add $0, $1, $2", 0 /* attdialect */, 131082 /* regdef:GPRC */, def renamable $r4, 131081 /* reguse:GPRC */, renamable $r3, 131081 /* reguse:GPRC */, killed renamable $r4, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15, 12 /* clobber */, implicit-def dead early-clobber $r16, 12 /* clobber */, implicit-def dead early-clobber $r17, 12 /* clobber */, implicit-def dead early-clobber $r18, 12 /* clobber */, implicit-def dead early-clobber $r19, 12 /* clobber */, implicit-def dead early-clobber $r20, 12 /* clobber */, implicit-def dead early-clobber $r21, 12 /* clobber */, implicit-def dead early-clobber $r22, 12 /* clobber */, implicit-def dead early-clobber $r23, 12 /* clobber */, implicit-def dead early-clobber $r24, 12 /* clobber */, implicit-def dead early-clobber $r25, 12 /* clobber */, implicit-def dead early-clobber $r26, 12 /* clobber */, implicit-def dead early-clobber $r27, 12 /* clobber */, implicit-def dead early-clobber $r28, 12 /* clobber */, implicit-def dead early-clobber $r29, 12 /* clobber */, implicit-def dead early-clobber $r30, 12 /* clobber */, implicit-def dead early-clobber $r31
; CHECK-NEXT: BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $r4
+ ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: B %bb.3
bb.0.entry:
successors: %bb.2(0x50000000), %bb.1(0x30000000)
diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 806c495fa6777..0aa04f40f6a52 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -11,21 +11,22 @@
define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-LABEL: quux:
; RV64I: # %bb.0: # %bb
+; RV64I-NEXT: beq a0, a1, .LBB0_4
+; RV64I-NEXT: # %bb.1: # %bb2.preheader
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: beq a0, a1, .LBB0_3
-; RV64I-NEXT: # %bb.1: # %bb2.preheader
; RV64I-NEXT: subw s0, a1, a0
; RV64I-NEXT: .LBB0_2: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call hoge at plt
; RV64I-NEXT: addiw s0, s0, -1
; RV64I-NEXT: bnez s0, .LBB0_2
-; RV64I-NEXT: .LBB0_3: # %bb6
+; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: .LBB0_4: # %bb6
; RV64I-NEXT: ret
bb:
%tmp = icmp eq i32 %arg, %arg1
diff --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll
index 93bb934c1cb0d..f37ace801b159 100644
--- a/llvm/test/CodeGen/RISCV/fli-licm.ll
+++ b/llvm/test/CodeGen/RISCV/fli-licm.ll
@@ -12,11 +12,11 @@
define void @process_nodes(ptr %0) nounwind {
; RV32-LABEL: process_nodes:
; RV32: # %bb.0: # %entry
+; RV32-NEXT: beqz a0, .LBB0_4
+; RV32-NEXT: # %bb.1: # %loop.preheader
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: beqz a0, .LBB0_3
-; RV32-NEXT: # %bb.1: # %loop.preheader
; RV32-NEXT: mv s0, a0
; RV32-NEXT: .LBB0_2: # %loop
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
@@ -25,19 +25,20 @@ define void @process_nodes(ptr %0) nounwind {
; RV32-NEXT: call do_it at plt
; RV32-NEXT: lw s0, 0(s0)
; RV32-NEXT: bnez s0, .LBB0_2
-; RV32-NEXT: .LBB0_3: # %exit
+; RV32-NEXT: # %bb.3:
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .LBB0_4: # %exit
; RV32-NEXT: ret
;
; RV64-LABEL: process_nodes:
; RV64: # %bb.0: # %entry
+; RV64-NEXT: beqz a0, .LBB0_4
+; RV64-NEXT: # %bb.1: # %loop.preheader
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT: beqz a0, .LBB0_3
-; RV64-NEXT: # %bb.1: # %loop.preheader
; RV64-NEXT: mv s0, a0
; RV64-NEXT: .LBB0_2: # %loop
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
@@ -46,10 +47,11 @@ define void @process_nodes(ptr %0) nounwind {
; RV64-NEXT: call do_it at plt
; RV64-NEXT: ld s0, 0(s0)
; RV64-NEXT: bnez s0, .LBB0_2
-; RV64-NEXT: .LBB0_3: # %exit
+; RV64-NEXT: # %bb.3:
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .LBB0_4: # %exit
; RV64-NEXT: ret
entry:
%1 = icmp eq ptr %0, null
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
index d67e66d7a7131..421b5b5364d35 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
@@ -4,11 +4,13 @@
define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB0_4
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov lr, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB0_2: @ %for.body
@@ -21,10 +23,7 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
@@ -51,11 +50,13 @@ for.body: ; preds = %entry, %for.body
define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: testlr:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB1_4
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB1_2: @ %for.body
@@ -68,10 +69,7 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n)
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r4
; CHECK-NEXT: bne .LBB1_2
-; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
-; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
index 99d169e63e5a5..59b32a3f441c1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -4,11 +4,12 @@
define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memcpy:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: lsl.w r12, r3, #2
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: b .LBB0_2
@@ -31,8 +32,9 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3
; CHECK-NEXT: vstrb.8 q0, [r5], #16
; CHECK-NEXT: letp lr, .LBB0_4
; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: .LBB0_5:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -55,12 +57,12 @@ for.body: ; preds = %entry, %for.body
define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
; CHECK-LABEL: test_memset:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r7, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_2: @ %for.body
@@ -80,8 +82,9 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
; CHECK-NEXT: vstrb.8 q0, [r12], #16
; CHECK-NEXT: letp lr, .LBB1_4
; CHECK-NEXT: b .LBB1_3
-; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp5 = icmp sgt i32 %n, 0
br i1 %cmp5, label %for.body, label %for.cond.cleanup
@@ -102,13 +105,14 @@ for.body: ; preds = %entry, %for.body
define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
; CHECK-LABEL: test_memmove:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB2_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB2_3
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: mov r9, r1
@@ -124,9 +128,10 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i
; CHECK-NEXT: add r6, r4
; CHECK-NEXT: subs r5, #1
; CHECK-NEXT: bne .LBB2_2
-; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
index 13e39a8f16e33..23eb5900bb7d1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
@@ -4,10 +4,11 @@
define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB0_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB0_3
; CHECK-NEXT: @ %bb.2:
@@ -80,8 +81,9 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB0_9
-; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB0_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
@@ -215,10 +217,11 @@ for.body: ; preds = %for.body.prol.loope
define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_add:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB1_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB1_3
; CHECK-NEXT: @ %bb.2:
@@ -291,8 +294,9 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca
; CHECK-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB1_9
-; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB1_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB1_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
@@ -426,10 +430,11 @@ for.body: ; preds = %for.body.prol.loope
define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_sub:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB2_10
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB2_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB2_3
; CHECK-NEXT: @ %bb.2:
@@ -502,8 +507,9 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca
; CHECK-NEXT: vsub.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB2_9
-; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB2_10:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB2_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
@@ -637,10 +643,11 @@ for.body: ; preds = %for.body.prol.loope
define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_int_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq.w .LBB3_13
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB3_1: @ %for.body.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bls .LBB3_6
; CHECK-NEXT: @ %bb.2: @ %vector.memcheck
@@ -729,8 +736,9 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #12]
; CHECK-NEXT: bne .LBB3_12
-; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB3_13:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp eq i32 %N, 0
br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index eb98b85eafc90..93119eac2d564 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -411,10 +411,12 @@ for.cond.cleanup: ; preds = %middle.block, %entr
define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
; CHECK-LABEL: two_loops_mul_add_v4i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: beq .LBB6_8
-; CHECK-NEXT: @ %bb.1: @ %vector.ph
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r0, #0
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB6_1: @ %vector.ph
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: bic r3, r3, #3
@@ -461,12 +463,10 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: @ %bb.6: @ %middle.block44
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r12, q0
-; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup7
+; CHECK-NEXT: .LBB6_7:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
-; CHECK-NEXT: .LBB6_8:
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: bx lr
entry:
%cmp35 = icmp eq i32 %N, 0
br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
index caf7a339805fc..1f3a43923db61 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
@@ -4,10 +4,11 @@
define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB0_7
-; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: lsl.w r12, r3, #1
; CHECK-NEXT: movs r3, #0
@@ -47,8 +48,9 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
; CHECK-NEXT: add r4, r12
; CHECK-NEXT: cmp r3, r8
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: @ %bb.7:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp252 = icmp sgt i32 %n, 0
br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
index 9ef5a46edf934..be1f1de71be3d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
@@ -5,17 +5,19 @@
define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrsh.w r12, [r2, #2]
+; CHECK-NEXT: cmp.w r12, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %for.cond3.preheader.lr.ph
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: sub sp, #64
-; CHECK-NEXT: ldrsh.w r12, [r2, #2]
-; CHECK-NEXT: cmp.w r12, #1
-; CHECK-NEXT: itt ge
-; CHECK-NEXT: ldrshge.w r7, [r2]
-; CHECK-NEXT: cmpge r7, #1
-; CHECK-NEXT: blt.w .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: ldrsh.w r7, [r2]
+; CHECK-NEXT: cmp r7, #1
+; CHECK-NEXT: blt.w .LBB0_6
+; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader
; CHECK-NEXT: movs r2, #252
; CHECK-NEXT: ldr r4, [sp, #152]
; CHECK-NEXT: and.w r6, r2, r3, lsr #3
@@ -46,14 +48,14 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q3, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT: .LBB0_2: @ %vector.ph
+; CHECK-NEXT: .LBB0_3: @ %vector.ph
; CHECK-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
+; CHECK-NEXT: @ Child Loop BB0_4 Depth 2
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r6, r7
; CHECK-NEXT: dls lr, r3
-; CHECK-NEXT: .LBB0_3: @ %vector.body
-; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
+; CHECK-NEXT: .LBB0_4: @ %vector.body
+; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vctp.16 r6
; CHECK-NEXT: subs r6, #8
@@ -89,18 +91,19 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrht.16 q0, [r5], #16
-; CHECK-NEXT: le lr, .LBB0_3
-; CHECK-NEXT: @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us
-; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: le lr, .LBB0_4
+; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
+; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: adds r4, #1
; CHECK-NEXT: add.w r0, r0, r1, lsl #1
; CHECK-NEXT: cmp r4, r12
-; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT: bne .LBB0_3
+; CHECK-NEXT: .LBB0_6:
; CHECK-NEXT: add sp, #64
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
%0 = load i16, ptr %iHeight, align 2
@@ -184,18 +187,19 @@ for.cond.cleanup: ; preds = %for.cond3.for.cond.
define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: sub sp, #80
; CHECK-NEXT: ldrsh.w r12, [r2, #2]
; CHECK-NEXT: cmp.w r12, #1
-; CHECK-NEXT: blt.w .LBB1_6
+; CHECK-NEXT: blt.w .LBB1_7
; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph
; CHECK-NEXT: ldrsh.w r2, [r2]
; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB1_6
-; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: sub sp, #80
; CHECK-NEXT: ldr r7, [sp, #168]
; CHECK-NEXT: movs r5, #120
; CHECK-NEXT: lsls r6, r3, #3
@@ -265,11 +269,13 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias noc
; CHECK-NEXT: adds r4, #1
; CHECK-NEXT: cmp r4, r12
; CHECK-NEXT: bne .LBB1_3
-; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: add sp, #80
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup
+; CHECK-NEXT: bx lr
entry:
%iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
%0 = load i16, ptr %iHeight, align 2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index fc58873f9857b..3b42ee36e7c2e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -53,10 +53,12 @@ if.end: ; preds = %do.body, %entry
define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) {
; CHECK-LABEL: nested:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: cbz r3, .LBB1_8
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: ldr.w r12, [sp, #24]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: b .LBB1_4
@@ -91,8 +93,9 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr no
; CHECK-NEXT: sub.w r12, r12, r5
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: b .LBB1_3
-; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: .LBB1_8:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp20.not = icmp eq i32 %m, 0
br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 6228d616b5842..b7b19a477ab0f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -981,6 +981,13 @@ if.end61: ; preds = %if.then59, %while.e
define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) {
; CHECK-LABEL: fir:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #8
+; CHECK-NEXT: blo.w .LBB16_13
+; CHECK-NEXT: @ %bb.1: @ %if.then
+; CHECK-NEXT: lsrs.w r12, r3, #2
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bxeq lr
+; CHECK-NEXT: .LBB16_2: @ %while.body.lr.ph
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
@@ -989,12 +996,6 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
-; CHECK-NEXT: cmp r3, #8
-; CHECK-NEXT: blo.w .LBB16_12
-; CHECK-NEXT: @ %bb.1: @ %if.then
-; CHECK-NEXT: lsrs.w r12, r3, #2
-; CHECK-NEXT: beq.w .LBB16_12
-; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph
; CHECK-NEXT: ldrh r6, [r0]
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: ldrd r4, r10, [r0, #4]
@@ -1106,11 +1107,13 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: add.w r4, r4, r0, lsl #2
; CHECK-NEXT: b .LBB16_4
-; CHECK-NEXT: .LBB16_12: @ %if.end
+; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .LBB16_13: @ %if.end
+; CHECK-NEXT: bx lr
entry:
%pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1
%i = load ptr, ptr %pState1, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
index 24f1831a3f07c..0335d24c0a782 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
@@ -290,12 +290,12 @@ end:
define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_simple:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
@@ -319,8 +319,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB8_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI8_0:
@@ -359,13 +360,14 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_complex:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB9_1: @ %vector.ph.preheader
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: blt .LBB9_5
-; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
@@ -401,9 +403,10 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture read
; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB9_2
-; CHECK-NEXT: .LBB9_5: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.5:
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI9_0:
@@ -461,12 +464,12 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_large:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
@@ -490,8 +493,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readon
; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB10_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI10_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
index 9093b9af00656..ea186cd6ed2d4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
@@ -4,12 +4,12 @@
define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: gather_inc_v4i32_simple:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: bic r12, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: sub.w lr, r12, #4
@@ -33,8 +33,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI0_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index 5f3a12711dc0f..da59cb259db61 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -211,12 +211,12 @@ entry:
define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
; CHECK-LABEL: test11:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: it gt
-; CHECK-NEXT: popgt {r4, pc}
+; CHECK-NEXT: bxgt lr
; CHECK-NEXT: .LBB10_1: @ %prehead
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r12, r1
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3
@@ -230,8 +230,9 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
; CHECK-NEXT: subs r2, #2
; CHECK-NEXT: strb r3, [r1], #1
; CHECK-NEXT: bne .LBB10_3
-; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: @ %bb.4:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp6 = icmp slt i32 %n, 0
br i1 %cmp6, label %prehead, label %for.cond.cleanup
@@ -440,12 +441,12 @@ declare void @other()
define void @multilooped_exit(i32 %b) {
; CHECK-LABEL: multilooped_exit:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r0, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB18_1: @ %loop.preheader
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w r4, #-1
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB18_3
@@ -498,8 +499,9 @@ define void @multilooped_exit(i32 %b) {
; CHECK-NEXT: vstrb.8 q0, [r3], #16
; CHECK-NEXT: letp lr, .LBB18_11
; CHECK-NEXT: b .LBB18_2
-; CHECK-NEXT: .LBB18_12: @ %exit
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: .LBB18_12:
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %b, 0
br i1 %cmp8, label %loop, label %exit
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
index 7e059ae726fc6..45bb70ec44b73 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
@@ -6,13 +6,14 @@
define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, ptr nocapture %pOut) {
; CHECK-LABEL: DCT_mve1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: ldr r3, [r0, #4]
; CHECK-NEXT: sub.w r12, r3, #1
; CHECK-NEXT: cmp.w r12, #2
-; CHECK-NEXT: blo .LBB0_5
-; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: it lo
+; CHECK-NEXT: bxlo lr
+; CHECK-NEXT: .LBB0_1: @ %for.body.preheader
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: ldr r5, [r0, #8]
; CHECK-NEXT: ldr r3, [r0]
; CHECK-NEXT: add.w r3, r3, r5, lsl #2
@@ -43,8 +44,9 @@ define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: vstr s0, [r7]
; CHECK-NEXT: bne .LBB0_2
-; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: @ %bb.5:
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: bx lr
entry:
%NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2
%i = load i32, ptr %NumInputs, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
index 94397f0ae587b..3a14e650bd53a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
@@ -127,15 +127,16 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <1
define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) {
; CHECK-LABEL: scatter_inc_v4i32_complex:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r1, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: cmp r1, #1
-; CHECK-NEXT: blt .LBB3_5
-; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: adr r4, .LCPI3_2
; CHECK-NEXT: bic r2, r1, #3
; CHECK-NEXT: vldrw.u32 q3, [r4]
@@ -168,10 +169,11 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i3
; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: cmp r2, r1
; CHECK-NEXT: bne .LBB3_2
-; CHECK-NEXT: .LBB3_5: @ %for.cond.cleanup
+; CHECK-NEXT: @ %bb.5:
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI3_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
index 85425db1eb6c8..42a00b61b4183 100644
--- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
@@ -58,11 +58,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry
define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) {
; CHECK-LABEL: start11:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB1_3
-; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: adds r4, r3, #3
; CHECK-NEXT: adr r5, .LCPI1_0
@@ -85,8 +86,9 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture re
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q3, [r2], #16
; CHECK-NEXT: bne .LBB1_2
-; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: @ %bb.3:
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.4:
; CHECK-NEXT: .LCPI1_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
index da0cd57d86dbb..0a26d9920981b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
@@ -4,11 +4,13 @@
define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) {
; CHECK-LABEL: test32:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK-NEXT: .LBB0_1: @ %vector.body.preheader
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: blt .LBB0_2
-; CHECK-NEXT: .LBB0_1: @ %vector.body
+; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
@@ -26,9 +28,10 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noali
; CHECK-NEXT: lsrl r4, r5, #31
; CHECK-NEXT: vmov q2[3], q2[1], r4, r12
; CHECK-NEXT: vstrb.8 q2, [r2], #16
-; CHECK-NEXT: bne .LBB0_1
-; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: bne .LBB0_2
+; CHECK-NEXT: @ %bb.3:
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: bx lr
entry:
%0 = and i32 %n, 3
%cmp = icmp eq i32 %0, 0
diff --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll
index 9c9a50d3e9ce1..691f46b9eeb0e 100644
--- a/llvm/test/CodeGen/X86/fold-call-3.ll
+++ b/llvm/test/CodeGen/X86/fold-call-3.ll
@@ -13,12 +13,12 @@
define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Actions) nounwind {
; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: cmpl $0, _NumTrials(%rip)
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.1: ## %bb.nph
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: cmpl $0, _NumTrials(%rip)
-; CHECK-NEXT: je LBB0_3
-; CHECK-NEXT: ## %bb.1: ## %bb.nph
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: xorl %ebp, %ebp
@@ -34,20 +34,21 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti
; CHECK-NEXT: incl %ebp
; CHECK-NEXT: cmpl _NumTrials(%rip), %ebp
; CHECK-NEXT: jb LBB0_2
-; CHECK-NEXT: LBB0_3: ## %return
+; CHECK-NEXT: ## %bb.3:
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: LBB0_4: ## %return
; CHECK-NEXT: retq
;
; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
; pre-RA: ## %bb.0: ## %entry
+; pre-RA-NEXT: cmpl $0, _NumTrials(%rip)
+; pre-RA-NEXT: je LBB0_4
+; pre-RA-NEXT: ## %bb.1: ## %bb.nph
; pre-RA-NEXT: pushq %rbp
; pre-RA-NEXT: pushq %rbx
; pre-RA-NEXT: subq $24, %rsp
-; pre-RA-NEXT: cmpl $0, _NumTrials(%rip)
-; pre-RA-NEXT: je LBB0_3
-; pre-RA-NEXT: ## %bb.1: ## %bb.nph
; pre-RA-NEXT: movq %rsi, %rbx
; pre-RA-NEXT: movq %rdi, %rax
; pre-RA-NEXT: xorl %ebp, %ebp
@@ -63,10 +64,11 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti
; pre-RA-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; pre-RA-NEXT: cmpl _NumTrials(%rip), %ebp
; pre-RA-NEXT: jb LBB0_2
-; pre-RA-NEXT: LBB0_3: ## %return
+; pre-RA-NEXT: ## %bb.3:
; pre-RA-NEXT: addq $24, %rsp
; pre-RA-NEXT: popq %rbx
; pre-RA-NEXT: popq %rbp
+; pre-RA-NEXT: LBB0_4: ## %return
; pre-RA-NEXT: retq
entry:
%i = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8
diff --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
index e21d4de178719..d0d46b5f11836 100644
--- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
+++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
@@ -9,12 +9,14 @@
define void @foo(i32 %N) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: js .LBB0_1
+; CHECK-NEXT: # %bb.4: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_1: # %bb.preheader
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: jns .LBB0_3
-; CHECK-NEXT: # %bb.1: # %bb.preheader
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: .p2align 4, 0x90
@@ -26,7 +28,7 @@ define void @foo(i32 %N) nounwind {
; CHECK-NEXT: decl %ebp
; CHECK-NEXT: cmpl %ebp, %ebx
; CHECK-NEXT: jne .LBB0_2
-; CHECK-NEXT: .LBB0_3: # %return
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll
index 6c33666fb5c3a..67579a5bb7c52 100644
--- a/llvm/test/CodeGen/X86/pr44412.ll
+++ b/llvm/test/CodeGen/X86/pr44412.ll
@@ -4,10 +4,10 @@
define void @bar(i32 %0, i32 %1) nounwind {
; CHECK-LABEL: bar:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.1: # %.preheader
+; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: decl %ebx
; CHECK-NEXT: .p2align 4, 0x90
@@ -16,8 +16,9 @@ define void @bar(i32 %0, i32 %1) nounwind {
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: addl $-1, %ebx
; CHECK-NEXT: jb .LBB0_2
-; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: retq
%3 = icmp eq i32 %0, 0
br i1 %3, label %8, label %4
@@ -36,10 +37,10 @@ define void @bar(i32 %0, i32 %1) nounwind {
define void @baz(i32 %0, i32 %1) nounwind {
; CHECK-LABEL: baz:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB1_3
+; CHECK-NEXT: je .LBB1_4
; CHECK-NEXT: # %bb.1: # %.preheader
+; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: decl %ebx
; CHECK-NEXT: .p2align 4, 0x90
@@ -48,8 +49,9 @@ define void @baz(i32 %0, i32 %1) nounwind {
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: addl $-1, %ebx
; CHECK-NEXT: jae .LBB1_2
-; CHECK-NEXT: .LBB1_3:
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: retq
%3 = icmp eq i32 %0, 0
br i1 %3, label %8, label %4
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index ec4a12eadb94e..f22ea739092f6 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -639,40 +639,40 @@ declare hidden fastcc ptr @find_temp_slot_from_address(ptr readonly)
define void @useLEA(ptr readonly %x) {
; ENABLE-LABEL: useLEA:
; ENABLE: ## %bb.0: ## %entry
-; ENABLE-NEXT: pushq %rax
-; ENABLE-NEXT: .cfi_def_cfa_offset 16
; ENABLE-NEXT: testq %rdi, %rdi
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_9
; ENABLE-NEXT: ## %bb.1: ## %if.end
; ENABLE-NEXT: cmpw $66, (%rdi)
-; ENABLE-NEXT: jne LBB8_7
+; ENABLE-NEXT: jne LBB8_9
; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false
+; ENABLE-NEXT: pushq %rax
+; ENABLE-NEXT: .cfi_def_cfa_offset 16
; ENABLE-NEXT: movq 8(%rdi), %rdi
; ENABLE-NEXT: movzwl (%rdi), %eax
; ENABLE-NEXT: leal -54(%rax), %ecx
; ENABLE-NEXT: cmpl $14, %ecx
; ENABLE-NEXT: ja LBB8_3
-; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false
+; ENABLE-NEXT: ## %bb.7: ## %lor.lhs.false
; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017
; ENABLE-NEXT: btl %ecx, %edx
; ENABLE-NEXT: jae LBB8_3
-; ENABLE-NEXT: LBB8_7: ## %cleanup
-; ENABLE-NEXT: popq %rax
+; ENABLE-NEXT: LBB8_8:
+; ENABLE-NEXT: addq $8, %rsp
+; ENABLE-NEXT: LBB8_9: ## %cleanup
; ENABLE-NEXT: retq
; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false
; ENABLE-NEXT: cmpl $134, %eax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false
; ENABLE-NEXT: cmpl $140, %eax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.5: ## %if.end.55
; ENABLE-NEXT: callq _find_temp_slot_from_address
; ENABLE-NEXT: testq %rax, %rax
-; ENABLE-NEXT: je LBB8_7
+; ENABLE-NEXT: je LBB8_8
; ENABLE-NEXT: ## %bb.6: ## %if.then.60
; ENABLE-NEXT: movb $1, 57(%rax)
-; ENABLE-NEXT: popq %rax
-; ENABLE-NEXT: retq
+; ENABLE-NEXT: jmp LBB8_8
;
; DISABLE-LABEL: useLEA:
; DISABLE: ## %bb.0: ## %entry
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
index 2069e974c6905..536f9912f1b6f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
@@ -23,7 +23,7 @@ define i32 @test(i32 %c, ptr %a, ptr %b) {
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_5:
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
entry:
%cmp13 = icmp sgt i32 %c, 0
@@ -62,7 +62,7 @@ define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) {
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: add x10, x1, #4
; CHECK-NEXT: add x11, x2, #8
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: .LBB1_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr w12, [x10, x8, lsl #2]
@@ -142,7 +142,7 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) {
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_5:
-; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
entry:
%cmp13 = icmp sgt i32 %c, 0
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index fa1c208ffbd77..63a3c725ae89e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -182,12 +182,12 @@ exit:
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
; X64-LABEL: extrastride:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbx
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: testl %r9d, %r9d
-; X64-NEXT: je .LBB2_3
+; X64-NEXT: je .LBB2_4
; X64-NEXT: # %bb.1: # %for.body.lr.ph
+; X64-NEXT: pushq %rbx
; X64-NEXT: leal (%rsi,%rsi), %r10d
; X64-NEXT: leal (%rsi,%rsi,2), %r11d
; X64-NEXT: addl %esi, %ecx
@@ -213,8 +213,9 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %
; X64-NEXT: addq %r8, %rdx
; X64-NEXT: decl %r9d
; X64-NEXT: jne .LBB2_2
-; X64-NEXT: .LBB2_3: # %for.end
+; X64-NEXT: # %bb.3:
; X64-NEXT: popq %rbx
+; X64-NEXT: .LBB2_4: # %for.end
; X64-NEXT: retq
;
; X32-LABEL: extrastride:
More information about the llvm-commits
mailing list