[llvm] [ARM] Port AArch64's CSel handling patterns to ARM (PR #152945)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 10 15:38:28 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/152945
None
>From 47088832dd665eed20f9cd6eb5fe480e8561646b Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 10 Aug 2025 18:38:08 -0400
Subject: [PATCH] [ARM] Port AArch64's CSel handling patterns to ARM
---
llvm/lib/Target/ARM/ARMInstrThumb2.td | 9 +
llvm/test/CodeGen/ARM/fpclamptosat.ll | 132 +-
llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll | 471 ++-
llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll | 381 ++-
llvm/test/CodeGen/Thumb/scmp.ll | 12 +-
llvm/test/CodeGen/Thumb/ucmp.ll | 12 +-
.../Thumb2/LowOverheadLoops/reductions.ll | 32 +-
.../test/CodeGen/Thumb2/mve-blockplacement.ll | 24 +-
.../CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 166 +-
.../CodeGen/Thumb2/mve-fptosi-sat-vector.ll | 2959 +++++++++--------
.../CodeGen/Thumb2/mve-fptoui-sat-vector.ll | 2412 +++++++-------
llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 2 +-
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 6 +-
llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll | 16 +-
14 files changed, 3417 insertions(+), 3217 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 8f56fb0938dd0..cd26e19378170 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5765,6 +5765,15 @@ let Predicates = [HasV8_1MMainline] in {
def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
(t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 1), imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 -1), imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 -1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
(Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..87a1221088e09 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -1039,8 +1039,8 @@ define i64 @stest_f64i64(double %x) {
;
; FULL-LABEL: stest_f64i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1049,20 +1049,20 @@ define i64 @stest_f64i64(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1295,8 +1295,8 @@ define i64 @stest_f32i64(float %x) {
;
; FULL-LABEL: stest_f32i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1305,20 +1305,20 @@ define i64 @stest_f32i64(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1556,8 +1556,8 @@ define i64 @stest_f16i64(half %x) {
;
; FULL-LABEL: stest_f16i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -1568,20 +1568,20 @@ define i64 @stest_f16i64(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -2708,8 +2708,8 @@ define i64 @stest_f64i64_mm(double %x) {
;
; FULL-LABEL: stest_f64i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -2718,21 +2718,21 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3021,8 +3021,8 @@ define i64 @stest_f32i64_mm(float %x) {
;
; FULL-LABEL: stest_f32i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -3031,21 +3031,21 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3339,8 +3339,8 @@ define i64 @stest_f16i64_mm(half %x) {
;
; FULL-LABEL: stest_f16i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -3351,21 +3351,21 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..a42a2a8083f6f 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -633,40 +633,74 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI6_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
-; VFP-LABEL: test_signed_i50_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI6_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI6_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: ittt lt
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movtlt r1, #65534
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: it gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: itt gt
-; VFP-NEXT: movwgt r1, #65535
-; VFP-NEXT: movtgt r1, #1
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI6_0:
-; VFP-NEXT: .long 0xd8000000 @ float -5.62949953E+14
-; VFP-NEXT: .LCPI6_1:
-; VFP-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+; VFP2-LABEL: test_signed_i50_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI6_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI6_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: ittt lt
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movtlt r1, #65534
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: it gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: movwgt r1, #65535
+; VFP2-NEXT: movtgt r1, #1
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI6_0:
+; VFP2-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; VFP2-NEXT: .LCPI6_1:
+; VFP2-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+;
+; FP16-LABEL: test_signed_i50_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI6_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI6_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: ittt lt
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movtlt r1, #65534
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: itt gt
+; FP16-NEXT: movwgt r1, #65535
+; FP16-NEXT: movtgt r1, #1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI6_0:
+; FP16-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; FP16-NEXT: .LCPI6_1:
+; FP16-NEXT: .long 0x57ffffff @ float 5.6294992E+14
%x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
ret i50 %x
}
@@ -735,37 +769,69 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI7_1:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
-; VFP-LABEL: test_signed_i64_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI7_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI7_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt.w r1, #-2147483648
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt gt
-; VFP-NEXT: mvngt r1, #-2147483648
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI7_0:
-; VFP-NEXT: .long 0xdf000000 @ float -9.22337203E+18
-; VFP-NEXT: .LCPI7_1:
-; VFP-NEXT: .long 0x5effffff @ float 9.22337149E+18
+; VFP2-LABEL: test_signed_i64_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI7_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI7_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt.w r1, #-2147483648
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: mvngt r1, #-2147483648
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI7_0:
+; VFP2-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; VFP2-NEXT: .LCPI7_1:
+; VFP2-NEXT: .long 0x5effffff @ float 9.22337149E+18
+;
+; FP16-LABEL: test_signed_i64_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI7_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI7_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt.w r1, #-2147483648
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: mvngt r1, #-2147483648
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI7_0:
+; FP16-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; FP16-NEXT: .LCPI7_1:
+; FP16-NEXT: .long 0x5effffff @ float 9.22337149E+18
%x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
ret i64 %x
}
@@ -880,43 +946,81 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI8_0:
; SOFT-NEXT: .long 1895825407 @ 0x70ffffff
;
-; VFP-LABEL: test_signed_i100_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __fixsfti
-; VFP-NEXT: vldr s0, .LCPI8_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI8_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: mvnlt r3, #7
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: movgt r3, #7
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: movvs r2, #0
-; VFP-NEXT: movvs r3, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI8_0:
-; VFP-NEXT: .long 0xf1000000 @ float -6.338253E+29
-; VFP-NEXT: .LCPI8_1:
-; VFP-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+; VFP2-LABEL: test_signed_i100_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __fixsfti
+; VFP2-NEXT: vldr s0, .LCPI8_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI8_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: mvnlt r3, #7
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: movgt r3, #7
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: movvs r2, #0
+; VFP2-NEXT: movvs r3, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI8_0:
+; VFP2-NEXT: .long 0xf1000000 @ float -6.338253E+29
+; VFP2-NEXT: .LCPI8_1:
+; VFP2-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+;
+; FP16-LABEL: test_signed_i100_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __fixsfti
+; FP16-NEXT: vldr s0, .LCPI8_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI8_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: mvnlt r3, #7
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: movgt r3, #7
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: movvs r2, #0
+; FP16-NEXT: movvs r3, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI8_0:
+; FP16-NEXT: .long 0xf1000000 @ float -6.338253E+29
+; FP16-NEXT: .LCPI8_1:
+; FP16-NEXT: .long 0x70ffffff @ float 6.33825262E+29
%x = call i100 @llvm.fptosi.sat.i100.f32(float %f)
ret i100 %x
}
@@ -1033,43 +1137,81 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI9_1:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
-; VFP-LABEL: test_signed_i128_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __fixsfti
-; VFP-NEXT: vldr s0, .LCPI9_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI9_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: movlt.w r3, #-2147483648
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: mvngt r3, #-2147483648
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: movvs r2, #0
-; VFP-NEXT: movvs r3, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI9_0:
-; VFP-NEXT: .long 0xff000000 @ float -1.70141183E+38
-; VFP-NEXT: .LCPI9_1:
-; VFP-NEXT: .long 0x7effffff @ float 1.70141173E+38
+; VFP2-LABEL: test_signed_i128_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __fixsfti
+; VFP2-NEXT: vldr s0, .LCPI9_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI9_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: movlt.w r3, #-2147483648
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: mvngt r3, #-2147483648
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: movvs r2, #0
+; VFP2-NEXT: movvs r3, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI9_0:
+; VFP2-NEXT: .long 0xff000000 @ float -1.70141183E+38
+; VFP2-NEXT: .LCPI9_1:
+; VFP2-NEXT: .long 0x7effffff @ float 1.70141173E+38
+;
+; FP16-LABEL: test_signed_i128_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __fixsfti
+; FP16-NEXT: vldr s0, .LCPI9_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI9_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: movlt.w r3, #-2147483648
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: mvngt r3, #-2147483648
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: movvs r2, #0
+; FP16-NEXT: movvs r3, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI9_0:
+; FP16-NEXT: .long 0xff000000 @ float -1.70141183E+38
+; FP16-NEXT: .LCPI9_1:
+; FP16-NEXT: .long 0x7effffff @ float 1.70141173E+38
%x = call i128 @llvm.fptosi.sat.i128.f32(float %f)
ret i128 %x
}
@@ -1993,10 +2135,10 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
; FP16-NEXT: movlt.w r1, #-2147483648
; FP16-NEXT: vcmp.f64 d1, d2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itt gt
+; FP16-NEXT: it gt
; FP16-NEXT: mvngt r1, #-2147483648
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f64 d1, d1
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itt vs
; FP16-NEXT: movvs r0, #0
@@ -2193,12 +2335,12 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; FP16-NEXT: mvnlt r3, #7
; FP16-NEXT: vcmp.f64 d1, d2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
+; FP16-NEXT: it gt
; FP16-NEXT: movgt r3, #7
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f64 d1, d1
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt vs
; FP16-NEXT: movvs r0, #0
@@ -2398,12 +2540,12 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; FP16-NEXT: movlt.w r3, #-2147483648
; FP16-NEXT: vcmp.f64 d1, d2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
+; FP16-NEXT: it gt
; FP16-NEXT: mvngt r3, #-2147483648
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f64 d1, d1
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt vs
; FP16-NEXT: movvs r0, #0
@@ -3147,18 +3289,17 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; FP16-NEXT: vldr s2, .LCPI26_1
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: ittt lt
; FP16-NEXT: movlt r1, #0
; FP16-NEXT: movtlt r1, #65534
; FP16-NEXT: movlt r0, #0
-; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: it gt
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f32 s16, s16
; FP16-NEXT: itt gt
; FP16-NEXT: movwgt r1, #65535
; FP16-NEXT: movtgt r1, #1
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itt vs
; FP16-NEXT: movvs r0, #0
@@ -3288,15 +3429,15 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
; FP16-NEXT: vldr s2, .LCPI27_1
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: itt lt
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: movlt.w r1, #-2147483648
-; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itt gt
+; FP16-NEXT: it gt
; FP16-NEXT: mvngt r1, #-2147483648
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f32 s16, s16
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itt vs
; FP16-NEXT: movvs r0, #0
@@ -3478,19 +3619,19 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; FP16-NEXT: vldr s2, .LCPI28_1
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: itttt lt
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: movlt r1, #0
; FP16-NEXT: movlt r2, #0
; FP16-NEXT: mvnlt r3, #7
-; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
+; FP16-NEXT: it gt
; FP16-NEXT: movgt r3, #7
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f32 s16, s16
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt vs
; FP16-NEXT: movvs r0, #0
@@ -3676,19 +3817,19 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; FP16-NEXT: vldr s2, .LCPI29_1
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: itttt lt
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: movlt r1, #0
; FP16-NEXT: movlt r2, #0
; FP16-NEXT: movlt.w r3, #-2147483648
-; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
+; FP16-NEXT: it gt
; FP16-NEXT: mvngt r3, #-2147483648
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r0, #-1
; FP16-NEXT: vcmp.f32 s16, s16
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt vs
; FP16-NEXT: movvs r0, #0
diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 4cc5f943dadff..3062ea1433a9f 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -471,32 +471,59 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI6_1:
; SOFT-NEXT: .long 262143 @ 0x3ffff
;
-; VFP-LABEL: test_signed_i50_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: .vsave {d8}
-; VFP-NEXT: vpush {d8}
-; VFP-NEXT: vmov s16, r0
-; VFP-NEXT: bl __aeabi_f2ulz
-; VFP-NEXT: vldr s0, .LCPI6_0
-; VFP-NEXT: vcmp.f32 s16, #0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt lt
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s16, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: ittt gt
-; VFP-NEXT: movwgt r1, #65535
-; VFP-NEXT: movtgt r1, #3
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vpop {d8}
-; VFP-NEXT: pop {r7, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI6_0:
-; VFP-NEXT: .long 0x587fffff @ float 1.12589984E+15
+; VFP2-LABEL: test_signed_i50_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: vmov s16, r0
+; VFP2-NEXT: bl __aeabi_f2ulz
+; VFP2-NEXT: vldr s0, .LCPI6_0
+; VFP2-NEXT: vcmp.f32 s16, #0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt lt
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s16, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: ittt gt
+; VFP2-NEXT: movwgt r1, #65535
+; VFP2-NEXT: movtgt r1, #3
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI6_0:
+; VFP2-NEXT: .long 0x587fffff @ float 1.12589984E+15
+;
+; FP16-LABEL: test_signed_i50_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r7, lr}
+; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: .vsave {d8}
+; FP16-NEXT: vpush {d8}
+; FP16-NEXT: vmov s16, r0
+; FP16-NEXT: bl __aeabi_f2ulz
+; FP16-NEXT: vldr s0, .LCPI6_0
+; FP16-NEXT: vcmp.f32 s16, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt lt
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vcmp.f32 s16, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt gt
+; FP16-NEXT: movwgt r1, #65535
+; FP16-NEXT: movtgt r1, #3
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vpop {d8}
+; FP16-NEXT: pop {r7, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI6_0:
+; FP16-NEXT: .long 0x587fffff @ float 1.12589984E+15
%x = call i50 @llvm.fptoui.sat.i50.f32(float %f)
ret i50 %x
}
@@ -550,31 +577,56 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI7_0:
; SOFT-NEXT: .long 1602224127 @ 0x5f7fffff
;
-; VFP-LABEL: test_signed_i64_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: .vsave {d8}
-; VFP-NEXT: vpush {d8}
-; VFP-NEXT: vmov s16, r0
-; VFP-NEXT: bl __aeabi_f2ulz
-; VFP-NEXT: vldr s0, .LCPI7_0
-; VFP-NEXT: vcmp.f32 s16, #0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt lt
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s16, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: vpop {d8}
-; VFP-NEXT: pop {r7, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI7_0:
-; VFP-NEXT: .long 0x5f7fffff @ float 1.8446743E+19
+; VFP2-LABEL: test_signed_i64_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: vmov s16, r0
+; VFP2-NEXT: bl __aeabi_f2ulz
+; VFP2-NEXT: vldr s0, .LCPI7_0
+; VFP2-NEXT: vcmp.f32 s16, #0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt lt
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s16, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI7_0:
+; VFP2-NEXT: .long 0x5f7fffff @ float 1.8446743E+19
+;
+; FP16-LABEL: test_signed_i64_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r7, lr}
+; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: .vsave {d8}
+; FP16-NEXT: vpush {d8}
+; FP16-NEXT: vmov s16, r0
+; FP16-NEXT: bl __aeabi_f2ulz
+; FP16-NEXT: vldr s0, .LCPI7_0
+; FP16-NEXT: vcmp.f32 s16, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt lt
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vcmp.f32 s16, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: vpop {d8}
+; FP16-NEXT: pop {r7, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI7_0:
+; FP16-NEXT: .long 0x5f7fffff @ float 1.8446743E+19
%x = call i64 @llvm.fptoui.sat.i64.f32(float %f)
ret i64 %x
}
@@ -654,35 +706,65 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI8_0:
; SOFT-NEXT: .long 1904214015 @ 0x717fffff
;
-; VFP-LABEL: test_signed_i100_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: .vsave {d8}
-; VFP-NEXT: vpush {d8}
-; VFP-NEXT: vmov s16, r0
-; VFP-NEXT: bl __fixunssfti
-; VFP-NEXT: vldr s0, .LCPI8_0
-; VFP-NEXT: vcmp.f32 s16, #0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r3, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s16, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt r3, #15
-; VFP-NEXT: vpop {d8}
-; VFP-NEXT: pop {r7, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI8_0:
-; VFP-NEXT: .long 0x717fffff @ float 1.26765052E+30
+; VFP2-LABEL: test_signed_i100_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: vmov s16, r0
+; VFP2-NEXT: bl __fixunssfti
+; VFP2-NEXT: vldr s0, .LCPI8_0
+; VFP2-NEXT: vcmp.f32 s16, #0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r3, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s16, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt r3, #15
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI8_0:
+; VFP2-NEXT: .long 0x717fffff @ float 1.26765052E+30
+;
+; FP16-LABEL: test_signed_i100_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r7, lr}
+; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: .vsave {d8}
+; FP16-NEXT: vpush {d8}
+; FP16-NEXT: vmov s16, r0
+; FP16-NEXT: bl __fixunssfti
+; FP16-NEXT: vldr s0, .LCPI8_0
+; FP16-NEXT: vcmp.f32 s16, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r3, #0
+; FP16-NEXT: vcmp.f32 s16, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: movgt r3, #15
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: vpop {d8}
+; FP16-NEXT: pop {r7, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI8_0:
+; FP16-NEXT: .long 0x717fffff @ float 1.26765052E+30
%x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
ret i100 %x
}
@@ -763,35 +845,64 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI9_0:
; SOFT-NEXT: .long 2139095039 @ 0x7f7fffff
;
-; VFP-LABEL: test_signed_i128_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: .vsave {d8}
-; VFP-NEXT: vpush {d8}
-; VFP-NEXT: vmov s16, r0
-; VFP-NEXT: bl __fixunssfti
-; VFP-NEXT: vldr s0, .LCPI9_0
-; VFP-NEXT: vcmp.f32 s16, #0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r3, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s16, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt.w r3, #-1
-; VFP-NEXT: vpop {d8}
-; VFP-NEXT: pop {r7, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI9_0:
-; VFP-NEXT: .long 0x7f7fffff @ float 3.40282347E+38
+; VFP2-LABEL: test_signed_i128_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: vmov s16, r0
+; VFP2-NEXT: bl __fixunssfti
+; VFP2-NEXT: vldr s0, .LCPI9_0
+; VFP2-NEXT: vcmp.f32 s16, #0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r3, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s16, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt.w r3, #-1
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI9_0:
+; VFP2-NEXT: .long 0x7f7fffff @ float 3.40282347E+38
+;
+; FP16-LABEL: test_signed_i128_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r7, lr}
+; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: .vsave {d8}
+; FP16-NEXT: vpush {d8}
+; FP16-NEXT: vmov s16, r0
+; FP16-NEXT: bl __fixunssfti
+; FP16-NEXT: vldr s0, .LCPI9_0
+; FP16-NEXT: vcmp.f32 s16, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r3, #0
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vcmp.f32 s16, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r3, r3, zr, gt
+; FP16-NEXT: vpop {d8}
+; FP16-NEXT: pop {r7, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI9_0:
+; FP16-NEXT: .long 0x7f7fffff @ float 3.40282347E+38
%x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
ret i128 %x
}
@@ -1508,9 +1619,8 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f64 d8, d0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 3
@@ -1645,17 +1755,17 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; FP16-NEXT: vldr d0, .LCPI18_0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt lt
-; FP16-NEXT: movlt r3, #0
; FP16-NEXT: movlt r2, #0
; FP16-NEXT: movlt r1, #0
; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r3, #0
; FP16-NEXT: vcmp.f64 d8, d0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r2, #-1
+; FP16-NEXT: it gt
; FP16-NEXT: movgt r3, #15
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 3
@@ -1797,11 +1907,10 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f64 d8, d0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r3, #-1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r3, r3, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 3
@@ -2369,10 +2478,10 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: ittt gt
+; FP16-NEXT: itt gt
; FP16-NEXT: movwgt r1, #65535
; FP16-NEXT: movtgt r1, #3
-; FP16-NEXT: movgt.w r0, #-1
+; FP16-NEXT: csinv r0, r0, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 2
@@ -2477,9 +2586,8 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 2
@@ -2610,17 +2718,17 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; FP16-NEXT: vcmp.f32 s16, #0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itttt lt
-; FP16-NEXT: movlt r3, #0
; FP16-NEXT: movlt r2, #0
; FP16-NEXT: movlt r1, #0
; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r3, #0
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r2, #-1
+; FP16-NEXT: it gt
; FP16-NEXT: movgt r3, #15
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 2
@@ -2758,11 +2866,10 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f32 s16, s0
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: itttt gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: movgt.w r1, #-1
-; FP16-NEXT: movgt.w r2, #-1
-; FP16-NEXT: movgt.w r3, #-1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r3, r3, zr, gt
; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 2
diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll
index c0024492b3a6d..297a9987e7576 100644
--- a/llvm/test/CodeGen/Thumb/scmp.ll
+++ b/llvm/test/CodeGen/Thumb/scmp.ll
@@ -37,8 +37,7 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, gt
-; V81M-NEXT: it lt
-; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lt
; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -78,8 +77,7 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, gt
-; V81M-NEXT: it lt
-; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lt
; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -119,8 +117,7 @@ define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, gt
-; V81M-NEXT: it lt
-; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lt
; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -303,8 +300,7 @@ define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, gt
-; V81M-NEXT: it lt
-; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lt
; V81M-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll
index 5d0f57e2a9d72..d34a2127d5141 100644
--- a/llvm/test/CodeGen/Thumb/ucmp.ll
+++ b/llvm/test/CodeGen/Thumb/ucmp.ll
@@ -26,8 +26,7 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, hi
-; V81M-NEXT: it lo
-; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lo
; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -56,8 +55,7 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, hi
-; V81M-NEXT: it lo
-; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lo
; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -86,8 +84,7 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, hi
-; V81M-NEXT: it lo
-; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lo
; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -259,8 +256,7 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; V81M: @ %bb.0:
; V81M-NEXT: cmp r0, r1
; V81M-NEXT: cset r0, hi
-; V81M-NEXT: it lo
-; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: csinv r0, r0, zr, lo
; V81M-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 6f986ce28381b..df51c81345057 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -630,7 +630,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr
define i32 @wrongop(ptr nocapture readonly %pd) {
; CHECK-LABEL: wrongop:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: movw r12, #47184
; CHECK-NEXT: movw r3, #23593
@@ -639,34 +639,34 @@ define i32 @wrongop(ptr nocapture readonly %pd) {
; CHECK-NEXT: movt r3, #49807
; CHECK-NEXT: mla r3, lr, r3, r12
; CHECK-NEXT: movw r1, #55051
-; CHECK-NEXT: movw r4, #23593
; CHECK-NEXT: movt r1, #163
; CHECK-NEXT: ldr r0, [r0]
-; CHECK-NEXT: movt r4, #655
; CHECK-NEXT: ror.w r12, r3, #4
; CHECK-NEXT: cmp r12, r1
+; CHECK-NEXT: ror.w r12, r3, #2
+; CHECK-NEXT: movw r3, #23593
; CHECK-NEXT: cset r1, lo
-; CHECK-NEXT: ror.w r3, r3, #2
-; CHECK-NEXT: mov.w r12, #1
-; CHECK-NEXT: cmp r3, r4
-; CHECK-NEXT: csel r3, r1, r12, lo
-; CHECK-NEXT: lsls.w r4, lr, #30
-; CHECK-NEXT: csel r1, r1, r3, ne
+; CHECK-NEXT: movt r3, #655
+; CHECK-NEXT: cmp r12, r3
+; CHECK-NEXT: csinc r12, r1, zr, hs
+; CHECK-NEXT: lsls.w r3, lr, #30
+; CHECK-NEXT: csel r1, r1, r12, ne
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
-; CHECK-NEXT: poplt {r4, pc}
+; CHECK-NEXT: poplt {r7, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
-; CHECK-NEXT: movw r3, :lower16:days
-; CHECK-NEXT: movs r4, #52
-; CHECK-NEXT: movt r3, :upper16:days
-; CHECK-NEXT: smlabb r1, r1, r4, r3
+; CHECK-NEXT: movw r12, :lower16:days
+; CHECK-NEXT: movs r3, #52
+; CHECK-NEXT: movt r12, :upper16:days
+; CHECK-NEXT: smlabb r1, r1, r3, r12
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q0, r3
+; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: adds r0, r2, #3
; CHECK-NEXT: bic r0, r0, #3
; CHECK-NEXT: subs r0, #4
-; CHECK-NEXT: add.w r0, r12, r0, lsr #2
+; CHECK-NEXT: add.w r0, r3, r0, lsr #2
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -680,7 +680,7 @@ define i32 @wrongop(ptr nocapture readonly %pd) {
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
-; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: pop {r7, pc}
entry:
%day1 = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 0
%0 = load i32, ptr %day1, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index d076cb00ad7e0..3ec639e459401 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -362,31 +362,31 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: blt.w .LBB1_28
; CHECK-NEXT: @ %bb.1: @ %for.cond2.preheader.lr.ph
-; CHECK-NEXT: movs r0, #1
; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: csel r7, r2, r0, lt
; CHECK-NEXT: mov r12, r1
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: csinc r7, r2, zr, ge
+; CHECK-NEXT: movw r1, #43691
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: it ls
-; CHECK-NEXT: movls r1, #3
+; CHECK-NEXT: movls r0, #3
+; CHECK-NEXT: movt r1, #43690
+; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: subs r1, r1, r7
-; CHECK-NEXT: movw r2, #43691
-; CHECK-NEXT: adds r1, #2
-; CHECK-NEXT: movt r2, #43690
+; CHECK-NEXT: adds r0, #2
+; CHECK-NEXT: movs r2, #4
; CHECK-NEXT: ldr r6, [sp, #128]
; CHECK-NEXT: movw r8, :lower16:c
-; CHECK-NEXT: umull r1, r2, r1, r2
+; CHECK-NEXT: umull r0, r1, r0, r1
; CHECK-NEXT: movt r8, :upper16:c
-; CHECK-NEXT: movs r1, #4
+; CHECK-NEXT: movs r0, #1
; CHECK-NEXT: @ implicit-def: $r10
; CHECK-NEXT: @ implicit-def: $r5
; CHECK-NEXT: @ implicit-def: $r11
; CHECK-NEXT: mov.w r9, #12
; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: add.w r1, r1, r2, lsr #1
-; CHECK-NEXT: add.w r0, r0, r2, lsr #1
+; CHECK-NEXT: add.w r0, r0, r1, lsr #1
+; CHECK-NEXT: add.w r1, r2, r1, lsr #1
; CHECK-NEXT: bic r3, r1, #3
; CHECK-NEXT: adr r1, .LCPI1_0
; CHECK-NEXT: vldrw.u32 q0, [r1]
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 742f2a75a1aa8..e8895bedc78a2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -684,17 +684,17 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-NEXT: sbcs r5, r2, #0
; CHECK-NEXT: mov.w r7, #-2147483648
; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: mov.w r6, #-1
; CHECK-NEXT: cset r5, lt
; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r5, ne
; CHECK-NEXT: csel r2, r2, r5, ne
-; CHECK-NEXT: mov.w r5, #-1
; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: csel r0, r0, r5, ne
-; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r7, r1
-; CHECK-NEXT: sbcs.w r2, r5, r2
-; CHECK-NEXT: sbcs.w r2, r5, r3
+; CHECK-NEXT: rsbs r5, r0, #0
+; CHECK-NEXT: sbcs.w r5, r7, r1
+; CHECK-NEXT: sbcs.w r2, r6, r2
+; CHECK-NEXT: sbcs.w r2, r6, r3
; CHECK-NEXT: csel r8, r1, r7, lt
; CHECK-NEXT: cset r1, lt
; CHECK-NEXT: cmp r1, #0
@@ -702,20 +702,20 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs.w r6, r0, #-1
-; CHECK-NEXT: sbcs.w r6, r1, r4
-; CHECK-NEXT: sbcs r6, r2, #0
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: cset r6, lt
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r0, r0, r5, ne
-; CHECK-NEXT: csel r3, r3, r6, ne
-; CHECK-NEXT: csel r2, r2, r6, ne
+; CHECK-NEXT: subs.w r5, r0, #-1
+; CHECK-NEXT: sbcs.w r5, r1, r4
+; CHECK-NEXT: sbcs r5, r2, #0
+; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csinv r0, r0, zr, eq
+; CHECK-NEXT: csel r3, r3, r5, ne
+; CHECK-NEXT: csel r2, r2, r5, ne
; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r7, r1
-; CHECK-NEXT: sbcs.w r2, r5, r2
-; CHECK-NEXT: sbcs.w r2, r5, r3
+; CHECK-NEXT: rsbs r5, r0, #0
+; CHECK-NEXT: sbcs.w r5, r7, r1
+; CHECK-NEXT: sbcs.w r2, r6, r2
+; CHECK-NEXT: sbcs.w r2, r6, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: csel r1, r1, r7, lt
; CHECK-NEXT: cmp r2, #0
@@ -776,8 +776,10 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
@@ -786,13 +788,12 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NEXT: vmov r12, lr, d8
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: sbcs r4, r3, #0
-; CHECK-NEXT: mov.w r8, #1
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csel r0, r0, r4, ne
; CHECK-NEXT: csel r3, r3, r4, ne
; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: csel r2, r2, r8, ne
+; CHECK-NEXT: csinc r2, r2, zr, eq
; CHECK-NEXT: rsbs r5, r0, #0
; CHECK-NEXT: mov.w r4, #0
; CHECK-NEXT: sbcs.w r5, r4, r1
@@ -800,31 +801,32 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r5, r1, r2, ne
-; CHECK-NEXT: csel r7, r0, r2, ne
+; CHECK-NEXT: csel r7, r1, r2, ne
+; CHECK-NEXT: csel r6, r0, r2, ne
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs r6, r2, #1
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: cset r6, lt
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
-; CHECK-NEXT: csel r3, r3, r6, ne
-; CHECK-NEXT: csel r1, r1, r6, ne
-; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r4, r1
+; CHECK-NEXT: subs r5, r2, #1
+; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r0, r0, r5, ne
+; CHECK-NEXT: csel r3, r3, r5, ne
+; CHECK-NEXT: csel r1, r1, r5, ne
+; CHECK-NEXT: csinc r2, r2, zr, eq
+; CHECK-NEXT: rsbs r5, r0, #0
+; CHECK-NEXT: sbcs.w r5, r4, r1
; CHECK-NEXT: sbcs.w r2, r4, r2
; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -845,21 +847,21 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-NEXT: subs.w r7, r0, #-1
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: sbcs.w r7, r1, r5
-; CHECK-NEXT: mov.w r6, #-1
+; CHECK-NEXT: mov.w r6, #-2147483648
; CHECK-NEXT: sbcs r7, r2, #0
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r2, r2, r7, ne
; CHECK-NEXT: csel r1, r1, r5, ne
; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: mov.w r7, #-2147483648
-; CHECK-NEXT: sbcs.w r4, r7, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
-; CHECK-NEXT: csel r8, r1, r7, lt
+; CHECK-NEXT: mov.w r7, #-1
+; CHECK-NEXT: sbcs.w r4, r6, r1
+; CHECK-NEXT: sbcs.w r2, r7, r2
+; CHECK-NEXT: sbcs.w r2, r7, r3
+; CHECK-NEXT: csel r8, r1, r6, lt
; CHECK-NEXT: cset r1, lt
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csel r10, r0, r1, ne
@@ -871,16 +873,16 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r4, ne
; CHECK-NEXT: csel r2, r2, r4, ne
; CHECK-NEXT: csel r1, r1, r5, ne
; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: sbcs.w r5, r7, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: sbcs.w r5, r6, r1
+; CHECK-NEXT: sbcs.w r2, r7, r2
+; CHECK-NEXT: sbcs.w r2, r7, r3
; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: csel r1, r1, r7, lt
+; CHECK-NEXT: csel r1, r1, r6, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
; CHECK-NEXT: vmov q0[2], q0[0], r10, r0
@@ -931,49 +933,51 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: vmov r5, r0, d0
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vmov r6, r0, d0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r4, r2, #1
-; CHECK-NEXT: mov.w r8, #1
; CHECK-NEXT: sbcs r4, r3, #0
-; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csel r0, r0, r4, ne
; CHECK-NEXT: csel r3, r3, r4, ne
; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: sbcs.w r4, r6, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: csinc r2, r2, zr, eq
+; CHECK-NEXT: rsbs r5, r0, #0
+; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: sbcs.w r5, r4, r1
+; CHECK-NEXT: sbcs.w r2, r4, r2
+; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r7, r0, r2, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: csel r4, r1, r2, ne
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: csel r5, r1, r2, ne
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r5, r2, #1
-; CHECK-NEXT: sbcs r5, r3, #0
-; CHECK-NEXT: cset r5, lt
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r0, r0, r5, ne
-; CHECK-NEXT: csel r3, r3, r5, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: sbcs.w r5, r6, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: subs r6, r2, #1
+; CHECK-NEXT: sbcs r6, r3, #0
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csel r3, r3, r6, ne
+; CHECK-NEXT: csel r1, r1, r6, ne
+; CHECK-NEXT: csinc r2, r2, zr, eq
+; CHECK-NEXT: rsbs r6, r0, #0
+; CHECK-NEXT: sbcs.w r6, r4, r1
+; CHECK-NEXT: sbcs.w r2, r4, r2
+; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
; CHECK-NEXT: csel r1, r1, r2, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1741,7 +1745,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: cset r5, lt
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r5, ne
; CHECK-NEXT: csel r2, r2, r5, ne
; CHECK-NEXT: csel r1, r1, r4, ne
@@ -1763,7 +1767,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r2, r2, r7, ne
; CHECK-NEXT: csel r1, r1, r4, ne
@@ -1888,12 +1892,12 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r2, r2, r7, ne
-; CHECK-NEXT: mov.w r7, #-1
; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: csel r0, r0, r7, ne
; CHECK-NEXT: rsbs r4, r0, #0
+; CHECK-NEXT: mov.w r7, #-1
; CHECK-NEXT: sbcs.w r4, r6, r1
; CHECK-NEXT: sbcs.w r2, r7, r2
; CHECK-NEXT: sbcs.w r2, r7, r3
@@ -1909,7 +1913,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: csinv r0, r0, zr, eq
; CHECK-NEXT: csel r3, r3, r4, ne
; CHECK-NEXT: csel r2, r2, r4, ne
; CHECK-NEXT: csel r1, r1, r5, ne
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 77548b49d77f2..602d07dfdc366 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -35,33 +35,32 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f32_v2i32(<2 x float> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vldr s18, .LCPI1_0
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vldr s20, .LCPI1_1
; CHECK-NEXT: vcmp.f32 s17, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r4, #-1
-; CHECK-NEXT: movlt.w r5, #-2147483648
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r4, #-2147483648
; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: csinv r5, r1, zr, lt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r5, #-2147483648
-; CHECK-NEXT: movgt r4, #0
+; CHECK-NEXT: mvngt r4, #-2147483648
+; CHECK-NEXT: movgt r5, #0
; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
+; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w r0, #-2147483648
-; CHECK-NEXT: movlt.w r1, #-1
; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: csinv r1, r1, zr, lt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt r1, #0
@@ -69,10 +68,10 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f32_v2i32(<2 x float> %f) {
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9, d10}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 2
@@ -339,82 +338,80 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI9_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI9_1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: csinv r9, r1, zr, eq
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: moveq.w r5, #-2147483648
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: vmov r6, r10, d8
+; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: itt ne
-; CHECK-NEXT: mvnne r5, #-2147483648
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
+; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: csinv r5, r1, zr, eq
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: mvnne r7, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r9
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -1212,34 +1209,33 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
; CHECK-NEXT: vcvtt.f32.f16 s18, s16
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vldr s20, .LCPI15_0
; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: vldr s20, .LCPI15_0
-; CHECK-NEXT: vldr s22, .LCPI15_1
-; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: vldr s22, .LCPI15_1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r4, #-1
-; CHECK-NEXT: movlt.w r5, #-2147483648
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r4, #-2147483648
; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: csinv r5, r1, zr, lt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r5, #-2147483648
-; CHECK-NEXT: movgt r4, #0
+; CHECK-NEXT: mvngt r4, #-2147483648
+; CHECK-NEXT: movgt r5, #0
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
+; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w r0, #-2147483648
-; CHECK-NEXT: movlt.w r1, #-1
; CHECK-NEXT: vcmp.f32 s16, s22
+; CHECK-NEXT: csinv r1, r1, zr, lt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt r1, #0
@@ -1247,10 +1243,10 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 2
@@ -1748,125 +1744,122 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32_duplicate(<4 x float>
define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i50:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vldr s22, .LCPI28_0
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vldr s20, .LCPI28_1
+; CHECK-NEXT: vmov r1, s18
; CHECK-NEXT: vcmp.f32 s17, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movwlt r11, #0
-; CHECK-NEXT: movtlt r11, #65534
+; CHECK-NEXT: movwlt r9, #0
+; CHECK-NEXT: movtlt r9, #65534
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r11, #65535
-; CHECK-NEXT: movtgt r11, #1
+; CHECK-NEXT: movwgt r9, #65535
+; CHECK-NEXT: movtgt r9, #1
+; CHECK-NEXT: csinv r8, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: movvs.w r9, #0
-; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movtlt r5, #65534
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movtlt r7, #65534
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r1, s19
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #1
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: csinv r10, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs.w r10, #0
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vcmp.f32 s19, s22
-; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movtlt r7, #65534
; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movtlt r5, #65534
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r1, s16
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r7, #65535
-; CHECK-NEXT: movtgt r7, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #1
; CHECK-NEXT: vcmp.f32 s19, s19
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r4, #0
-; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s16, s22
-; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: bfc r7, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: ittt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: lsrl r2, r5, #28
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bfc r9, #18, #14
+; CHECK-NEXT: lsrl r2, r7, #28
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r8]
-; CHECK-NEXT: lsr.w r0, r7, #10
-; CHECK-NEXT: bfc r7, #18, #14
-; CHECK-NEXT: bfc r11, #18, #14
-; CHECK-NEXT: lsll r4, r7, #22
-; CHECK-NEXT: orr.w r3, r5, r7
-; CHECK-NEXT: str.w r3, [r8, #20]
-; CHECK-NEXT: orr.w r2, r2, r4
-; CHECK-NEXT: str.w r2, [r8, #16]
-; CHECK-NEXT: strb.w r0, [r8, #24]
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: lsrl r0, r11, #14
-; CHECK-NEXT: orr.w r2, r11, r6, lsl #4
-; CHECK-NEXT: strd r0, r2, [r8, #8]
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: lsr.w r0, r5, #10
+; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: lsll r6, r5, #22
+; CHECK-NEXT: orr.w r3, r7, r5
+; CHECK-NEXT: str r3, [r4, #20]
+; CHECK-NEXT: orr.w r2, r2, r6
+; CHECK-NEXT: str r2, [r4, #16]
+; CHECK-NEXT: strb r0, [r4, #24]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: lsrl r0, r9, #14
+; CHECK-NEXT: orr.w r2, r9, r10, lsl #4
+; CHECK-NEXT: strd r0, r2, [r4, #8]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: bfc r1, #18, #14
-; CHECK-NEXT: orr.w r0, r1, r9, lsl #18
-; CHECK-NEXT: str.w r0, [r8, #4]
+; CHECK-NEXT: orr.w r0, r1, r8, lsl #18
+; CHECK-NEXT: str r0, [r4, #4]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI28_0:
@@ -1880,98 +1873,93 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vldr s22, .LCPI29_0
-; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vldr s20, .LCPI29_1
-; CHECK-NEXT: vmov r9, s17
-; CHECK-NEXT: vcmp.f32 s19, s22
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r10, #-2147483648
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt.w r4, #-2147483648
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r4, #-2147483648
+; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: vmov r7, s17
+; CHECK-NEXT: vmov r8, s18
+; CHECK-NEXT: csinv r9, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vcmp.f32 s19, s22
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt.w r5, #-2147483648
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r11, #-1
-; CHECK-NEXT: mvngt r10, #-2147483648
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r5, #-2147483648
; CHECK-NEXT: vcmp.f32 s19, s19
+; CHECK-NEXT: csinv r10, r0, zr, gt
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r8, s16
; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: vcmp.f32 s17, s22
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt.w r6, #-2147483648
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: movlt.w r7, #-2147483648
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r6, #-2147483648
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r7, #-2147483648
+; CHECK-NEXT: vcmp.f32 s17, s17
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vcmp.f32 s17, s22
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r4, #-2147483648
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: mvngt r4, #-2147483648
-; CHECK-NEXT: vcmp.f32 s17, s17
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r4, #0
-; CHECK-NEXT: movvs r5, #0
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s22
-; CHECK-NEXT: vmov q1[2], q1[0], r7, r11
+; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: vmov q0[2], q0[0], r9, r6
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: vmov q1[3], q1[1], r6, r10
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r1, #-2147483648
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r7
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: vmov q1[2], q1[0], r0, r10
+; CHECK-NEXT: vmov q1[3], q1[1], r1, r5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI29_0:
@@ -1997,25 +1985,27 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vmov r6, s17
; CHECK-NEXT: vldr s22, .LCPI30_0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vldr s20, .LCPI30_1
-; CHECK-NEXT: vmov r7, s19
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: vmov r7, s19
+; CHECK-NEXT: csinv r3, r6, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
@@ -2024,83 +2014,77 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: str.w r1, [r4, #29]
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r4, #25]
-; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: str.w r3, [r4, #25]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vcmp.f32 s17, s22
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: mov r6, r2
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: mvnlt r6, #7
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r6, #7
; CHECK-NEXT: vcmp.f32 s17, s17
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r10, r2, zr, gt
+; CHECK-NEXT: csinv r11, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vcmp.f32 s19, s22
-; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r9, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: mvnlt r7, #7
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r11, #-1
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #7
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vcmp.f32 s19, s19
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: csinv r9, r1, zr, gt
+; CHECK-NEXT: csinv r8, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs.w r9, #0
-; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt r3, #7
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
@@ -2111,31 +2095,31 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: lsrl r0, r11, #28
-; CHECK-NEXT: and r1, r9, #15
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: and r1, r7, #15
+; CHECK-NEXT: lsrl r0, r9, #28
; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: lsrl r0, r5, #28
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: lsrl r0, r11, #28
; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r11, r8, lsl #4
+; CHECK-NEXT: orr.w r0, r9, r8, lsl #4
; CHECK-NEXT: lsrl r8, r1, #28
; CHECK-NEXT: str.w r0, [r4, #45]
+; CHECK-NEXT: orr.w r0, r11, r10, lsl #4
+; CHECK-NEXT: str r0, [r4, #20]
; CHECK-NEXT: strb.w r8, [r4, #49]
+; CHECK-NEXT: and r1, r6, #15
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: lsrl r10, r1, #28
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r6, r1, #28
-; CHECK-NEXT: strb r6, [r4, #24]
+; CHECK-NEXT: strb.w r10, [r4, #24]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
@@ -2154,122 +2138,141 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i128:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vldr s22, .LCPI31_0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vldr s20, .LCPI31_1
-; CHECK-NEXT: add.w r12, r4, #48
-; CHECK-NEXT: vcmp.f32 s19, s22
+; CHECK-NEXT: mov r11, r3
+; CHECK-NEXT: vcmp.f32 s17, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt.w r11, #-2147483648
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s19, s19
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r11, #-2147483648
+; CHECK-NEXT: vcmp.f32 s17, s17
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r10, r7, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs.w r10, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: vmov r7, s16
-; CHECK-NEXT: vmov r6, s17
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: add.w r12, r4, #32
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt.w r6, #-2147483648
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r6, #-2147483648
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: csinv r9, r0, zr, gt
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: csinv r5, r2, zr, gt
+; CHECK-NEXT: csinv r8, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s17, s22
-; CHECK-NEXT: add.w r12, r4, #16
+; CHECK-NEXT: vcmp.f32 s19, s22
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s19, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: movlt.w r7, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s17, s17
+; CHECK-NEXT: vcmp.f32 s19, s19
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r7, #-2147483648
+; CHECK-NEXT: add.w r3, r4, #48
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: stm r3!, {r0, r1, r2}
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: strd r9, r8, [r4, #32]
+; CHECK-NEXT: str r5, [r4, #40]
+; CHECK-NEXT: str.w r10, [r4, #16]
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: str r1, [r4, #20]
+; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT: str r1, [r4, #24]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r3, #-2147483648
; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [r4, #8]
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [r4, #4]
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: str r7, [r4, #60]
+; CHECK-NEXT: str r6, [r4, #44]
+; CHECK-NEXT: str.w r11, [r4, #28]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: str r3, [r4, #12]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI31_0:
@@ -2303,66 +2306,62 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI32_0
-; CHECK-NEXT: vmov r8, r7, d8
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r8, r4, d8
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2iz
; CHECK-NEXT: vldr d0, .LCPI32_1
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: csinv r7, r0, zr, eq
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: vmov r10, r3, d0
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: vmov r6, r5, d9
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r9, #-1
-; CHECK-NEXT: mov r10, r3
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r9, r3
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
+; CHECK-NEXT: movne r7, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: and r0, r9, #1
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: and r0, r7, #1
+; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: movs r4, #0
-; CHECK-NEXT: bfi r4, r0, #0, #1
+; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: bfi r7, r0, #0, #1
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2iz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r7, #-1
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csinv r4, r0, zr, eq
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
@@ -2370,17 +2369,17 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: and r0, r7, #1
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: and r0, r4, #1
; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: bfi r4, r0, #1, #1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: strb r4, [r0]
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: bfi r7, r0, #1, #1
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: strb r7, [r0]
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2405,82 +2404,80 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI33_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI33_1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: csinv r9, r1, zr, eq
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: mvneq r5, #127
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: vmov r6, r10, d8
+; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: it eq
+; CHECK-NEXT: mvneq r4, #127
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r5, #127
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: movne r4, #127
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
+; CHECK-NEXT: it eq
; CHECK-NEXT: mvneq r7, #127
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: csinv r5, r1, zr, eq
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #127
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r9
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2505,84 +2502,82 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI34_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI34_1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: csinv r9, r1, zr, eq
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r5, #61440
-; CHECK-NEXT: movteq r5, #65535
-; CHECK-NEXT: moveq.w r4, #-1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: vmov r6, r10, d8
+; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: movweq r4, #61440
+; CHECK-NEXT: movteq r4, #65535
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r5, #4095
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: movwne r4, #4095
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: ittt eq
+; CHECK-NEXT: itt eq
; CHECK-NEXT: movweq r7, #61440
; CHECK-NEXT: movteq r7, #65535
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: csinv r5, r1, zr, eq
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movwne r7, #4095
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r9
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2607,84 +2602,82 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI35_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI35_1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: csinv r9, r1, zr, eq
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r5, #32768
-; CHECK-NEXT: movteq r5, #65535
-; CHECK-NEXT: moveq.w r4, #-1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: vmov r6, r10, d8
+; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: movweq r4, #32768
+; CHECK-NEXT: movteq r4, #65535
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r5, #32767
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: movwne r4, #32767
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: ittt eq
+; CHECK-NEXT: itt eq
; CHECK-NEXT: movweq r7, #32768
; CHECK-NEXT: movteq r7, #65535
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: csinv r5, r1, zr, eq
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movwne r7, #32767
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r9
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2709,94 +2702,92 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI36_0
-; CHECK-NEXT: vmov r5, r4, d9
-; CHECK-NEXT: vmov r7, r6, d0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vmov r9, r4, d9
+; CHECK-NEXT: vmov r6, r10, d0
+; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: strd r5, r4, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI36_1
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r11, r5, d8
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: vmov r10, r0, d0
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r8, #0
-; CHECK-NEXT: movteq r8, #65532
-; CHECK-NEXT: moveq.w r9, #-1
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: vmov r8, r11, d8
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov r0, r7, d0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: strd r7, r0, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: csinv r0, r1, zr, eq
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r4, #0
+; CHECK-NEXT: movteq r4, #65532
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: moveq r7, #0
-; CHECK-NEXT: movteq r7, #65532
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: moveq r6, #0
+; CHECK-NEXT: movteq r6, #65532
+; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: csinv r10, r1, zr, eq
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movwne r7, #65535
-; CHECK-NEXT: movtne r7, #3
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: movwne r6, #65535
+; CHECK-NEXT: movtne r6, #3
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: movwne r8, #65535
-; CHECK-NEXT: movtne r8, #3
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: movwne r4, #65535
+; CHECK-NEXT: movtne r4, #3
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r10, #0
; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r8
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r9
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r10, r5
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2821,82 +2812,80 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double>
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI37_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI37_1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: csinv r9, r1, zr, eq
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: moveq.w r5, #-2147483648
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: vmov r6, r10, d8
+; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: itt ne
-; CHECK-NEXT: mvnne r5, #-2147483648
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
+; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: csinv r5, r1, zr, eq
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: mvnne r7, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r9
+; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2925,67 +2914,72 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI38_0
-; CHECK-NEXT: vmov r5, r4, d9
+; CHECK-NEXT: vmov r9, r4, d9
; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: strd r4, r5, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r5, r10, d8
; CHECK-NEXT: vldr d0, .LCPI38_1
-; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: vmov r9, r8, d0
-; CHECK-NEXT: csel r11, r0, r11, ne
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: vmov r8, r1, d0
+; CHECK-NEXT: csel r0, r0, r11, ne
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp] @ 4-byte Spill
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r4, #0
; CHECK-NEXT: movteq r4, #65534
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: csel r7, r0, r7, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r11, r0, r7, ne
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r6, #0
; CHECK-NEXT: movteq r6, #65534
+; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: csinv r0, r11, zr, ne
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: itt ne
; CHECK-NEXT: movwne r6, #65535
; CHECK-NEXT: movtne r6, #1
-; CHECK-NEXT: ldrd r9, r0, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r11, #-1
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: csinv r7, r0, zr, ne
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: itt ne
; CHECK-NEXT: movwne r4, #65535
; CHECK-NEXT: movtne r4, #1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r5
@@ -2993,13 +2987,14 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: movne r7, #0
; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r11
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r6, #0
; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
@@ -3028,16 +3023,16 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI39_0
; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: vmov r11, r3, d0
+; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
@@ -3046,12 +3041,12 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: vldr d0, .LCPI39_1
; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: csel r9, r0, r9, ne
+; CHECK-NEXT: csel r5, r0, r9, ne
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: vmov r6, r10, d8
-; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpgt
@@ -3060,18 +3055,18 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r9, #-1
+; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: csinv r9, r5, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne.w r9, #0
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r6
@@ -3081,7 +3076,7 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
; CHECK-NEXT: csel r5, r0, r5, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r10
@@ -3091,9 +3086,9 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r7, #-2147483648
-; CHECK-NEXT: movne.w r5, #-1
+; CHECK-NEXT: csinv r5, r5, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: itt ne
@@ -3101,7 +3096,7 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: movne r7, #0
; CHECK-NEXT: vmov q0[2], q0[0], r5, r9
; CHECK-NEXT: vmov q0[3], q0[1], r7, r4
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3126,105 +3121,108 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #48
-; CHECK-NEXT: sub sp, #48
+; CHECK-NEXT: .pad #40
+; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI40_0
-; CHECK-NEXT: vmov r7, r6, d8
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r10, r9, d0
+; CHECK-NEXT: vmov r7, r5, d8
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: vldr d0, .LCPI40_1
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: csel r4, r2, r4, ne
-; CHECK-NEXT: vmov r5, r11, d0
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: vmov r6, r10, d0
+; CHECK-NEXT: strd r0, r3, [sp, #20] @ 8-byte Folded Spill
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r8, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: str.w r4, [r8, #8]
-; CHECK-NEXT: str.w r9, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: mov r6, r11
+; CHECK-NEXT: str.w r4, [r11, #8]
+; CHECK-NEXT: mov r11, r9
+; CHECK-NEXT: str.w r9, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r9, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r8, #4]
+; CHECK-NEXT: str r4, [r6, #4]
+; CHECK-NEXT: mov r6, r9
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r11, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csinv r4, r4, zr, ne
+; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: vmov r9, r8, d9
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: str r4, [r0]
-; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __aeabi_dcmpge
@@ -3233,101 +3231,99 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r7, r1, r4, ne
-; CHECK-NEXT: mov r4, r5
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: csel r4, r1, r4, ne
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r11, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: mov r7, r4
+; CHECK-NEXT: mov r10, r5
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: csel r6, r6, r0, ne
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #0
-; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: lsrl r0, r7, #28
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: str r0, [r1, #16]
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r6, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r5, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: orr.w r0, r7, r4, lsl #4
-; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r0, [r7, #20]
+; CHECK-NEXT: str.w r0, [r11, #20]
; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: it eq
; CHECK-NEXT: mvneq r6, #7
+; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: mov r10, r5
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
@@ -3343,28 +3339,28 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: movne r0, #0
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r4, r1, #28
-; CHECK-NEXT: strb r4, [r7, #24]
-; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: strb.w r4, [r11, #24]
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: it eq
; CHECK-NEXT: mvneq r4, #7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #7
; CHECK-NEXT: bl __aeabi_dcmpun
@@ -3375,8 +3371,8 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str r0, [r7, #12]
-; CHECK-NEXT: add sp, #48
+; CHECK-NEXT: str.w r0, [r11, #12]
+; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3401,243 +3397,246 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #32
-; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: .pad #48
+; CHECK-NEXT: sub sp, #48
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI41_0
-; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r6, r5, d9
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: vldr d0, .LCPI41_1
-; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: vmov r10, r11, d0
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r5, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: str.w r5, [r9, #28]
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r5, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: str.w r5, [r9, #24]
-; CHECK-NEXT: mov r11, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r9, #20]
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: vmov r6, r5, d8
-; CHECK-NEXT: mov r10, r9
-; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r9, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r10, #16]
+; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r11, r3
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r9, r3
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: vldr d0, .LCPI41_1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r9, #-2147483648
-; CHECK-NEXT: ldr.w r10, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: csel r7, r2, r7, ne
+; CHECK-NEXT: vmov r9, r10, d0
+; CHECK-NEXT: strd r0, r3, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: str.w r9, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: str.w r10, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r9, #-2147483648
+; CHECK-NEXT: csinv r7, r7, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r8, #24]
+; CHECK-NEXT: mov r4, r8
+; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill
; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: str.w r9, [r7, #12]
-; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r4, r4, r0, ne
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r7, r1, r0, ne
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: csinv r7, r7, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7, #8]
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r4, #20]
+; CHECK-NEXT: mov r9, r4
+; CHECK-NEXT: mov r10, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: csel r7, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: csinv r7, r7, zr, ne
+; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r11, r8, d8
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7, #4]
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r9, #16]
+; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: ldr.w r10, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r7, r2, r7, ne
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r7, r7, zr, ne
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r9, #8]
+; CHECK-NEXT: mov r6, r9
+; CHECK-NEXT: ldr.w r9, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r7, r7, zr, ne
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r7, [r6, #4]
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr.w r9, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r5, r10
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r7, r7, zr, ne
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r6]
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r10, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r4, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r4, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: mvnne r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7]
-; CHECK-NEXT: add sp, #32
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: str r4, [r5, #28]
+; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r4, #-2147483648
+; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: str r0, [r5, #12]
+; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -4288,101 +4287,98 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vcvtt.f32.f16 s24, s16
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s17
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vldr s22, .LCPI48_0
+; CHECK-NEXT: vcvtb.f32.f16 s26, s17
; CHECK-NEXT: vldr s20, .LCPI48_1
+; CHECK-NEXT: vmov r2, s26
; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r2, s24
+; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s18
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vcmp.f32 s24, s22
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movtlt r1, #65534
; CHECK-NEXT: vcmp.f32 s24, s20
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movtlt r5, #65534
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcvtb.f32.f16 s26, s18
+; CHECK-NEXT: vmov r1, s26
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r1, #65535
-; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #1
; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp] @ 4-byte Spill
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: ittt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: movtlt r6, #65534
-; CHECK-NEXT: vcmp.f32 s26, s20
-; CHECK-NEXT: vcvtt.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r6, #65535
; CHECK-NEXT: movtgt r6, #1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: vcvtt.f32.f16 s18, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
@@ -4392,124 +4388,122 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movwlt r9, #0
-; CHECK-NEXT: movtlt r9, #65534
-; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movtlt r7, #65534
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #1
; CHECK-NEXT: vcvtb.f32.f16 s18, s19
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movwgt r9, #65535
-; CHECK-NEXT: movtgt r9, #1
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movtlt r5, #65534
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movwlt r9, #0
+; CHECK-NEXT: movtlt r9, #65534
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vcvtt.f32.f16 s18, s19
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r11, #-1
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #1
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r9, #65535
+; CHECK-NEXT: movtgt r9, #1
+; CHECK-NEXT: csinv r8, r0, zr, gt
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs.w r11, #0
-; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movtlt r7, #65534
-; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: ittt lt
+; CHECK-NEXT: movwlt r11, #0
+; CHECK-NEXT: movtlt r11, #65534
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movwgt r7, #65535
-; CHECK-NEXT: movtgt r7, #1
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r11, #65535
+; CHECK-NEXT: movtgt r11, #1
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: csinv r10, r0, zr, gt
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcmp.f32 s16, s22
-; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: bfc r9, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: ittt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: lsrl r2, r5, #28
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: bfc r7, #18, #14
+; CHECK-NEXT: lsrl r2, r9, #28
+; CHECK-NEXT: bfc r6, #18, #14
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: lsr.w r0, r7, #10
-; CHECK-NEXT: bfc r7, #18, #14
-; CHECK-NEXT: bfc r9, #18, #14
-; CHECK-NEXT: lsll r10, r7, #22
-; CHECK-NEXT: bfc r6, #18, #14
-; CHECK-NEXT: orr.w r3, r5, r7
+; CHECK-NEXT: lsr.w r0, r11, #10
+; CHECK-NEXT: bfc r11, #18, #14
+; CHECK-NEXT: lsll r10, r11, #22
+; CHECK-NEXT: orr.w r3, r9, r11
; CHECK-NEXT: str.w r3, [r4, #45]
; CHECK-NEXT: orr.w r2, r2, r10
; CHECK-NEXT: str.w r2, [r4, #41]
; CHECK-NEXT: strb.w r0, [r4, #49]
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: lsrl r0, r9, #14
-; CHECK-NEXT: orr.w r2, r9, r11, lsl #4
+; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: lsrl r0, r7, #14
+; CHECK-NEXT: orr.w r2, r7, r8, lsl #4
; CHECK-NEXT: str.w r2, [r4, #37]
; CHECK-NEXT: str.w r0, [r4, #33]
-; CHECK-NEXT: orr.w r0, r6, r8, lsl #18
+; CHECK-NEXT: orr.w r0, r6, r3, lsl #18
; CHECK-NEXT: str.w r0, [r4, #29]
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: lsr.w r7, r5, #10
+; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: bfc r5, #18, #14
; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: lsr.w r5, r3, #10
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: lsll r0, r3, #22
-; CHECK-NEXT: mov r7, r3
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: bfc r3, #18, #14
+; CHECK-NEXT: lsll r0, r5, #22
; CHECK-NEXT: lsrl r2, r3, #28
-; CHECK-NEXT: orr.w r3, r3, r7
+; CHECK-NEXT: orr.w r3, r3, r5
; CHECK-NEXT: str r3, [r4, #20]
; CHECK-NEXT: orr.w r2, r2, r0
; CHECK-NEXT: str r2, [r4, #16]
-; CHECK-NEXT: strb r5, [r4, #24]
+; CHECK-NEXT: strb r7, [r4, #24]
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: bfc r3, #18, #14
@@ -4546,176 +4540,175 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s19
-; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: vcvtt.f32.f16 s22, s16
+; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s19
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vldr s28, .LCPI49_0
-; CHECK-NEXT: vldr s30, .LCPI49_1
+; CHECK-NEXT: vcvtb.f32.f16 s20, s18
+; CHECK-NEXT: vcvtt.f32.f16 s24, s18
+; CHECK-NEXT: vldr s18, .LCPI49_1
+; CHECK-NEXT: vcmp.f32 s22, s28
; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: vcmp.f32 s24, s28
-; CHECK-NEXT: vcvtt.f32.f16 s20, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcvtt.f32.f16 s26, s19
+; CHECK-NEXT: vcmp.f32 s22, s18
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r8, #-2147483648
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: vcmp.f32 s24, s30
-; CHECK-NEXT: vcvtt.f32.f16 s22, s18
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r9, #-1
+; CHECK-NEXT: vmov r1, s26
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r8, #-2147483648
-; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: vcmp.f32 s22, s22
+; CHECK-NEXT: vmov r4, s20
+; CHECK-NEXT: vmov r5, s24
+; CHECK-NEXT: csinv r7, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r6, s20
-; CHECK-NEXT: vmov r4, s22
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vcmp.f32 s26, s28
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt.w r11, #-2147483648
-; CHECK-NEXT: vcmp.f32 s26, s30
+; CHECK-NEXT: movlt.w r9, #-2147483648
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r11, #-2147483648
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r9, #-2147483648
; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vcmp.f32 s22, s28
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vcmp.f32 s24, s28
+; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r4, #-2147483648
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: vcmp.f32 s22, s30
+; CHECK-NEXT: movlt.w r10, #-2147483648
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: mvngt r4, #-2147483648
-; CHECK-NEXT: vcmp.f32 s22, s22
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r10, #-2147483648
+; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: movvs.w r10, #0
; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vcmp.f32 s20, s28
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s20, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r6, #-2147483648
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vcmp.f32 s20, s30
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt.w r11, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: mvngt r6, #-2147483648
+; CHECK-NEXT: vcvtb.f32.f16 s24, s19
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r11, #-2147483648
; CHECK-NEXT: vcmp.f32 s20, s20
+; CHECK-NEXT: csinv r1, r0, zr, gt
+; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmov q5[2], q5[0], r1, r5
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s24, s28
+; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: movlt.w r4, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r1, #-2147483648
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r4, #-2147483648
+; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: vmov.f32 s19, s28
+; CHECK-NEXT: csinv r1, r0, zr, gt
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmov q6[2], q6[0], r1, r6
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vcmp.f32 s16, s19
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s18
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt.w r5, #-2147483648
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vcvtt.f32.f16 s16, s17
-; CHECK-NEXT: vmov q5[2], q5[0], r0, r7
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r5, #-2147483648
+; CHECK-NEXT: csinv r1, r0, zr, gt
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: it vs
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q5[3], q5[1], r1, r6
+; CHECK-NEXT: vmov q7[2], q7[0], r1, r7
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s19
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r6, #-2147483648
-; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vcvtb.f32.f16 s16, s17
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r6, #-2147483648
+; CHECK-NEXT: csinv r7, r0, zr, gt
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s16, s19
+; CHECK-NEXT: vmov q7[3], q7[1], r5, r8
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: vmov q5[3], q5[1], r11, r10
+; CHECK-NEXT: vmov q6[3], q6[1], r4, r9
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r1, #-2147483648
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vmov q0, q7
+; CHECK-NEXT: vmov q2, q5
+; CHECK-NEXT: vmov q3, q6
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: vcvtb.f32.f16 s16, s18
-; CHECK-NEXT: vmov q6[2], q6[0], r0, r7
-; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: vmov q1[2], q1[0], r0, r7
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q6[3], q6[1], r1, r6
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
-; CHECK-NEXT: vmov q3[2], q3[0], r10, r9
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
-; CHECK-NEXT: vmov q3[3], q3[1], r11, r8
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: mvngt r1, #-2147483648
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: vmov q0, q5
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q2[2], q2[0], r0, r5
-; CHECK-NEXT: vmov q1, q6
-; CHECK-NEXT: vmov q2[3], q2[1], r1, r4
+; CHECK-NEXT: vmov q1[3], q1[1], r1, r6
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -4738,88 +4731,84 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: .pad #48
-; CHECK-NEXT: sub sp, #48
+; CHECK-NEXT: .pad #56
+; CHECK-NEXT: sub sp, #56
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vcvtb.f32.f16 s24, s17
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcvtb.f32.f16 s26, s18
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vldr s22, .LCPI50_0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vldr s20, .LCPI50_1
-; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: vcvtb.f32.f16 s26, s18
; CHECK-NEXT: vcmp.f32 s24, s22
-; CHECK-NEXT: mov r10, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: csinv r8, r2, zr, gt
+; CHECK-NEXT: csinv r10, r1, zr, gt
+; CHECK-NEXT: csinv r9, r7, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill
; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: movvs.w r10, #0
+; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcvtb.f32.f16 s24, s19
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vcmp.f32 s26, s22
-; CHECK-NEXT: mov r7, r2
+; CHECK-NEXT: vcvtb.f32.f16 s24, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vcmp.f32 s26, s20
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: csinv r7, r0, zr, gt
+; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: csinv r6, r2, zr, gt
+; CHECK-NEXT: csinv r5, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill
; CHECK-NEXT: ittt vs
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s24
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vcvtt.f32.f16 s24, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
@@ -4829,154 +4818,150 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: str.w r1, [r4, #79]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s16
; CHECK-NEXT: str.w r0, [r4, #75]
; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: str.w r7, [r4, #58]
-; CHECK-NEXT: str.w r6, [r4, #54]
-; CHECK-NEXT: str.w r5, [r4, #50]
-; CHECK-NEXT: str.w r10, [r4, #33]
-; CHECK-NEXT: str.w r9, [r4, #29]
-; CHECK-NEXT: str.w r8, [r4, #25]
+; CHECK-NEXT: str.w r6, [r4, #58]
+; CHECK-NEXT: str.w r5, [r4, #54]
+; CHECK-NEXT: str.w r7, [r4, #50]
+; CHECK-NEXT: str.w r8, [r4, #33]
+; CHECK-NEXT: str.w r10, [r4, #29]
+; CHECK-NEXT: str.w r9, [r4, #25]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s24
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
+; CHECK-NEXT: csinv r7, r1, zr, gt
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
-; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: str r7, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vcvtt.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: csinv r7, r1, zr, gt
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r11, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: mvnlt r11, #7
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r11, #7
; CHECK-NEXT: vcvtt.f32.f16 s18, s19
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r10, r2, zr, gt
+; CHECK-NEXT: csinv r7, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs.w r10, #0
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: mov r11, r3
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r11, #7
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: mvnlt r5, #7
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r11, #7
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r5, #7
; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: csinv r8, r0, zr, gt
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: csinv r9, r1, zr, gt
+; CHECK-NEXT: csinv r6, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs.w r11, #0
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: mvnlt r12, #7
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r12, #7
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: lsrl r8, r9, #28
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r12, #7
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
@@ -4987,66 +4972,66 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: lsrl r0, r9, #28
-; CHECK-NEXT: str.w r0, [r4, #91]
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str.w r8, [r4, #91]
+; CHECK-NEXT: ldr.w lr, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r0, lr
; CHECK-NEXT: lsrl r0, r7, #28
; CHECK-NEXT: str.w r0, [r4, #66]
-; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: ldr.w r8, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: lsrl r0, r3, #28
; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload
+; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: lsrl r0, r1, #28
-; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: and r1, r11, #15
; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r9, r10, lsl #4
-; CHECK-NEXT: lsrl r10, r1, #28
+; CHECK-NEXT: orr.w r0, r9, r6, lsl #4
; CHECK-NEXT: str.w r0, [r4, #95]
-; CHECK-NEXT: strb.w r10, [r4, #99]
-; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r7, r10, lsl #4
+; CHECK-NEXT: str.w r0, [r4, #70]
+; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r3, r7, lsl #4
+; CHECK-NEXT: str.w r0, [r4, #45]
+; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r1, r2, lsl #4
+; CHECK-NEXT: and r1, r5, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: str r0, [r4, #20]
+; CHECK-NEXT: strb.w r6, [r4, #99]
+; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT: and r1, r11, #15
+; CHECK-NEXT: lsrl r10, r1, #28
; CHECK-NEXT: str.w r0, [r4, #87]
-; CHECK-NEXT: orr.w r0, r7, r8, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #70]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: strb.w r8, [r4, #74]
-; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: strb.w r10, [r4, #74]
+; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
+; CHECK-NEXT: orr.w r0, r0, lr, lsl #4
; CHECK-NEXT: str.w r0, [r4, #62]
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r3, r2, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb.w r2, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb.w r0, [r4, #49]
+; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, lr, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r8, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb r2, [r4, #24]
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb r0, [r4, #24]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs.w r12, #0
-; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
; CHECK-NEXT: and r0, r12, #15
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
-; CHECK-NEXT: add sp, #48
+; CHECK-NEXT: add sp, #56
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -5063,244 +5048,308 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
; CHECK-LABEL: test_signed_v8f16_v8i128:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .pad #72
+; CHECK-NEXT: sub sp, #72
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtt.f32.f16 s30, s19
-; CHECK-NEXT: vcvtb.f32.f16 s20, s16
-; CHECK-NEXT: vmov r0, s30
-; CHECK-NEXT: vcvtb.f32.f16 s26, s19
-; CHECK-NEXT: vldr s22, .LCPI51_0
-; CHECK-NEXT: vmov r5, s20
-; CHECK-NEXT: vmov r7, s26
-; CHECK-NEXT: vcvtt.f32.f16 s28, s18
+; CHECK-NEXT: vcvtt.f32.f16 s24, s16
+; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vldr s24, .LCPI51_1
-; CHECK-NEXT: add.w r12, r4, #112
-; CHECK-NEXT: vmov r6, s28
-; CHECK-NEXT: vcvtb.f32.f16 s18, s18
-; CHECK-NEXT: vcmp.f32 s30, s24
+; CHECK-NEXT: vldr s22, .LCPI51_0
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vldr s20, .LCPI51_1
+; CHECK-NEXT: vcvtb.f32.f16 s26, s17
+; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s30, s22
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s30, s30
+; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r7, r7, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r3, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: str r7, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp, #52] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s26, s24
-; CHECK-NEXT: add.w r12, r4, #96
+; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s26, s26
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r3, #-2147483648
+; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r3, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s18
-; CHECK-NEXT: vcvtt.f32.f16 s26, s17
+; CHECK-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s28, s24
-; CHECK-NEXT: add.w r12, r4, #80
+; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s28, s22
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s28, s28
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r3, #-2147483648
+; CHECK-NEXT: vcvtb.f32.f16 s24, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s26
-; CHECK-NEXT: vcvtb.f32.f16 s28, s17
+; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s18, s24
-; CHECK-NEXT: add.w r12, r4, #64
+; CHECK-NEXT: vcmp.f32 s24, s22
+; CHECK-NEXT: mov r11, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
+; CHECK-NEXT: movlt.w r11, #-2147483648
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r11, #-2147483648
+; CHECK-NEXT: vcmp.f32 s24, s24
+; CHECK-NEXT: vcvtt.f32.f16 s18, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s28
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s26, s24
-; CHECK-NEXT: add.w r12, r4, #48
+; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: movlt.w r6, #-2147483648
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r6, #-2147483648
+; CHECK-NEXT: vcvtb.f32.f16 s18, s19
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s16
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s28, s24
-; CHECK-NEXT: add.w r12, r4, #32
+; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s28, s22
+; CHECK-NEXT: movlt.w r7, #-2147483648
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s28, s28
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: vcvtt.f32.f16 s18, s19
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r7, #-2147483648
+; CHECK-NEXT: csinv r10, r0, zr, gt
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: csinv r8, r2, zr, gt
+; CHECK-NEXT: csinv r9, r1, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs.w r10, #0
+; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s16, s24
-; CHECK-NEXT: add.w r12, r4, #16
+; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s22
+; CHECK-NEXT: movlt.w r5, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: add.w r3, r4, #112
+; CHECK-NEXT: it gt
+; CHECK-NEXT: mvngt r5, #-2147483648
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: stm r3!, {r0, r1, r2}
+; CHECK-NEXT: strd r10, r9, [r4, #96]
+; CHECK-NEXT: str.w r8, [r4, #104]
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #80]
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #84]
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #88]
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #64]
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #68]
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #72]
+; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #48]
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #52]
+; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #56]
+; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #32]
+; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #36]
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #40]
+; CHECK-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #16]
+; CHECK-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #20]
+; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #24]
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s20, s24
+; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s20, s22
+; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vcmp.f32 s16, s16
+; CHECK-NEXT: it gt
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s20, s20
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [r4, #8]
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [r4, #4]
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: str r5, [r4, #124]
+; CHECK-NEXT: str r7, [r4, #108]
+; CHECK-NEXT: str r6, [r4, #92]
+; CHECK-NEXT: str.w r11, [r4, #76]
+; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #60]
+; CHECK-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #44]
+; CHECK-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r4, #28]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: str r3, [r4, #12]
+; CHECK-NEXT: add sp, #72
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI51_0:
-; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38
-; CHECK-NEXT: .LCPI51_1:
; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38
+; CHECK-NEXT: .LCPI51_1:
+; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38
%x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f)
ret <8 x i128> %x
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index ee040feca4240..bd109d0e92457 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -35,31 +35,31 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f32_v2i32(<2 x float> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vldr s18, .LCPI1_0
; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r4, #0
-; CHECK-NEXT: vcmp.f32 s17, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r4, #0
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s18
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r1, #0
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
; CHECK-NEXT: vpop {d8, d9}
@@ -288,9 +288,7 @@ define arm_aapcs_vfpcc <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: csinv r0, r6, zr, ne
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
@@ -340,13 +338,13 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movne.w r9, #-1
; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csinv r9, r9, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r5
@@ -354,18 +352,18 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
+; CHECK-NEXT: csel r7, r0, r6, ne
+; CHECK-NEXT: csel r6, r1, r6, ne
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: csinv r0, r7, zr, ne
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT: vmov q0[3], q0[1], r6, r8
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -397,75 +395,74 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vldr d0, .LCPI10_0
; CHECK-NEXT: vmov r8, r9, d1
-; CHECK-NEXT: vmov r5, r4, d0
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vmov.f32 s16, s4
; CHECK-NEXT: vmov.f32 s17, s5
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI10_1
-; CHECK-NEXT: vmov r11, r1, d9
; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: vmov r7, r6, d8
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r10, r0, r10, ne
+; CHECK-NEXT: csel r4, r0, r10, ne
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r9, r2
-; CHECK-NEXT: mov r8, r3
+; CHECK-NEXT: vmov r7, r6, d8
+; CHECK-NEXT: vmov r5, r11, d9
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r3
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r4, zr, ne
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #-1
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: csel r5, r0, r4, ne
+; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r4, r0, r4, ne
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r7, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r10, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov.32 q0[1], r10
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
+; CHECK-NEXT: csinv r0, r4, zr, ne
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r10
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -491,109 +488,105 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI11_0
+; CHECK-NEXT: vmov r7, r6, d9
; CHECK-NEXT: vmov q5, q1
-; CHECK-NEXT: vmov r8, r9, d10
-; CHECK-NEXT: vmov r2, r11, d0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: vmov r11, r4, d0
+; CHECK-NEXT: str r7, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov r5, r0, d11
; CHECK-NEXT: vldr d0, .LCPI11_1
-; CHECK-NEXT: vmov r5, r1, d11
+; CHECK-NEXT: vmov r9, r8, d10
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vmov r1, r0, d0
+; CHECK-NEXT: strd r1, r0, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: vmov r6, r7, d8
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r10, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: strd r5, r1, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csel r0, r0, r10, ne
+; CHECK-NEXT: ldr.w r10, [sp] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r0, r6, ne
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: mov r5, r7
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csinv r8, r6, zr, ne
+; CHECK-NEXT: vmov r6, r7, d8
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r10, r11
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: csel r4, r0, r4, ne
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r8, r9
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r5, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: vmov r7, r6, d9
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csinv r4, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: vmov q4[2], q4[0], r4, r8
; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r4, r0, zr, ne
+; CHECK-NEXT: ldrd r0, r1, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldrd r1, r0, [sp, #12] @ 8-byte Folded Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r0, zr, ne
+; CHECK-NEXT: vmov q4[3], q4[1], r0, r4
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -620,52 +613,85 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov.f32 s17, s1
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vldr d0, .LCPI12_0
-; CHECK-NEXT: vmov r6, r11, d4
+; CHECK-NEXT: vmov r6, r4, d4
; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s18, s6
-; CHECK-NEXT: vmov.f32 s20, s4
-; CHECK-NEXT: vmov.f32 s22, s2
-; CHECK-NEXT: vmov.f32 s19, s7
-; CHECK-NEXT: vmov.f32 s21, s5
-; CHECK-NEXT: vmov.f32 s23, s3
+; CHECK-NEXT: vmov.f32 s20, s6
+; CHECK-NEXT: vmov.f32 s22, s4
+; CHECK-NEXT: vmov.f32 s16, s2
+; CHECK-NEXT: vmov.f32 s21, s7
+; CHECK-NEXT: vmov.f32 s23, s5
+; CHECK-NEXT: vmov.f32 s17, s3
; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vmov r8, r1, d11
; CHECK-NEXT: vldr d0, .LCPI12_1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r10, r9, d9
+; CHECK-NEXT: vmov r5, r10, d10
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r4, ne
+; CHECK-NEXT: csel r7, r0, r7, ne
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: vmov r5, r1, d10
-; CHECK-NEXT: strd r5, r1, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r8, r11, d11
+; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: csinv r0, r7, zr, ne
+; CHECK-NEXT: mov r7, r10
+; CHECK-NEXT: str.w r0, [r9, #16]
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str.w r10, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r10, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r11, r2
-; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r5, r11
+; CHECK-NEXT: mov r11, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r4, r0, r4, ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r10, r9, d9
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #16]
; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: csinv r8, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r10
@@ -675,82 +701,43 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: csel r4, r0, r4, ne
; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: mov r10, r11
-; CHECK-NEXT: mov r11, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldrd r0, r1, [sp, #12] @ 8-byte Folded Reload
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r9, r7
+; CHECK-NEXT: csinv r6, r4, zr, ne
+; CHECK-NEXT: mov r7, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vmov r4, r5, d8
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q4[2], q4[0], r6, r8
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: csinv r8, r0, zr, ne
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r9, r0, r4, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r8, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r7, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov r4, r5, d8
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: csel r6, r0, r6, ne
; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r0
+; CHECK-NEXT: csinv r0, r6, zr, ne
+; CHECK-NEXT: vmov q4[3], q4[1], r0, r8
; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: vstrw.32 q0, [r0]
+; CHECK-NEXT: vstrw.32 q4, [r0]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
@@ -778,141 +765,140 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f32 s18, s0
; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: vmov.f32 s17, s1
+; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r5, r6, d5
-; CHECK-NEXT: vmov r10, r3, d0
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vmov.f32 s20, s8
; CHECK-NEXT: vmov.f32 s22, s6
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s24, s2
+; CHECK-NEXT: vmov.f32 s24, s4
+; CHECK-NEXT: vmov.f32 s16, s2
; CHECK-NEXT: vmov.f32 s21, s9
; CHECK-NEXT: vmov.f32 s23, s7
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: vmov.f32 s25, s3
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov.f32 s25, s5
+; CHECK-NEXT: vmov.f32 s17, s3
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vmov r9, r1, d11
+; CHECK-NEXT: vmov r2, r1, d11
; CHECK-NEXT: vldr d0, .LCPI13_1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r8, r11, d10
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r4, ne
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: vmov r10, r9, d10
+; CHECK-NEXT: vmov r11, r4, d0
+; CHECK-NEXT: csel r7, r0, r8, ne
; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: vmov r7, r1, d12
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: strd r7, r1, [sp, #24] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r2, r1, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r2, r1, d12
+; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: strd r2, r1, [sp, #12] @ 8-byte Folded Spill
; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r6, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: str r4, [r7, #20]
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: csinv r0, r7, zr, ne
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: str r0, [r6, #20]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vmov r2, r1, d9
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r8, r5
-; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: csel r5, r0, r5, ne
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #16]
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: csinv r0, r5, zr, ne
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: str r0, [r6, #16]
+; CHECK-NEXT: mov r9, r7
+; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csel r0, r0, r5, ne
+; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r4, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r7, r5
; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r4, r0, r4, ne
+; CHECK-NEXT: mov r0, r10
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: csel r9, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: vmov r6, r5, d9
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: csinv r0, r4, zr, ne
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r7, r9
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r8, r0, r4, ne
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: csel r4, r0, r4, ne
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldrd r0, r1, [sp, #28] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: csinv r6, r4, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vmov r4, r5, d8
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r8, #-1
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: vmov q4[2], q4[0], r6, r0
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: csinv r9, r0, zr, ne
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpge
@@ -924,17 +910,14 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: csel r6, r0, r6, ne
; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r8
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r0
+; CHECK-NEXT: csinv r0, r6, zr, ne
+; CHECK-NEXT: vmov q4[3], q4[1], r0, r9
; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: vstrw.32 q0, [r0]
+; CHECK-NEXT: vstrw.32 q4, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
; CHECK-NEXT: add sp, #4
@@ -985,32 +968,32 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
; CHECK-NEXT: vcvtt.f32.f16 s18, s16
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vldr s20, .LCPI15_0
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r4, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r4, #0
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r1, #0
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
; CHECK-NEXT: vpop {d8, d9, d10}
@@ -1425,91 +1408,87 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: .vsave {d8, d9, d10}
; CHECK-NEXT: vpush {d8, d9, d10}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r7, s18
+; CHECK-NEXT: vmov r8, s18
; CHECK-NEXT: vldr s20, .LCPI28_0
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: vcmp.f32 s18, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s19, s20
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r7, #-1
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
+; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: csinv r2, r0, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r10, #65535
-; CHECK-NEXT: movtgt r10, #3
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: movwgt r6, #65535
+; CHECK-NEXT: movtgt r6, #3
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: mov r6, r7
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: lsll r4, r3, #22
-; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: lsrl r8, r1, #28
+; CHECK-NEXT: csinv r4, r7, zr, gt
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: str.w r5, [r8]
+; CHECK-NEXT: lsll r4, r3, #22
+; CHECK-NEXT: mov r7, r2
+; CHECK-NEXT: orr.w r1, r1, r3
+; CHECK-NEXT: csinv r0, r9, zr, gt
+; CHECK-NEXT: str.w r0, [r10]
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r9, #65535
-; CHECK-NEXT: movtgt r9, #3
-; CHECK-NEXT: orrs r1, r3
-; CHECK-NEXT: str.w r1, [r8, #20]
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #3
+; CHECK-NEXT: str.w r1, [r10, #20]
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: orr.w r2, r6, r4
+; CHECK-NEXT: orr.w r2, r8, r4
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: bfc r9, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
-; CHECK-NEXT: str.w r2, [r8, #16]
-; CHECK-NEXT: lsr.w r2, r10, #10
-; CHECK-NEXT: strb.w r2, [r8, #24]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: str.w r2, [r10, #16]
+; CHECK-NEXT: lsr.w r2, r6, #10
; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: strb.w r2, [r10, #24]
+; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: orr.w r0, r5, r0, lsl #18
; CHECK-NEXT: lsrl r2, r1, #14
-; CHECK-NEXT: orr.w r0, r9, r0, lsl #18
; CHECK-NEXT: orr.w r1, r1, r7, lsl #4
-; CHECK-NEXT: strd r2, r1, [r8, #8]
-; CHECK-NEXT: str.w r0, [r8, #4]
+; CHECK-NEXT: strd r2, r1, [r10, #8]
+; CHECK-NEXT: str.w r0, [r10, #4]
; CHECK-NEXT: vpop {d8, d9, d10}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-NEXT: .p2align 2
@@ -1523,77 +1502,69 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10}
-; CHECK-NEXT: vpush {d8, d9, d10}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: vldr s20, .LCPI29_0
-; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vldr s24, .LCPI29_0
+; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vcmp.f32 s17, s24
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r9, s17
-; CHECK-NEXT: vmov r8, s16
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: vmov r5, s19
+; CHECK-NEXT: vmov r8, s18
+; CHECK-NEXT: csinv r7, r0, zr, gt
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: csinv r9, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: vcmp.f32 s16, s24
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: csinv r4, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vcmp.f32 s19, #0
+; CHECK-NEXT: vmov q5[2], q5[0], r6, r7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vcmp.f32 s19, s24
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r4, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r4, #-1
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: vmov q5[3], q5[1], r4, r9
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: csinv r4, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: vmov q1[2], q1[0], r7, r11
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: vmov q0, q5
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s24
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: vmov q1[3], q1[1], r6, r10
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
-; CHECK-NEXT: vpop {d8, d9, d10}
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: vmov q1[2], q1[0], r0, r5
+; CHECK-NEXT: csinv r0, r1, zr, gt
+; CHECK-NEXT: vmov q1[3], q1[1], r0, r4
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI29_0:
@@ -1611,124 +1582,116 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10}
; CHECK-NEXT: vpush {d8, d9, d10}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vmov r6, s17
; CHECK-NEXT: vldr s20, .LCPI30_0
-; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: vcmp.f32 s17, #0
+; CHECK-NEXT: mov r9, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str.w r2, [r4, #33]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: str.w r1, [r4, #29]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str.w r0, [r4, #25]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: vmov r7, s19
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: movgt.w r9, #15
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: csinv r5, r1, zr, gt
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: mov r6, r2
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: movgt r6, #15
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r11, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r1, zr, gt
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: mov r11, r3
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s19, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r11, #15
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r9, #-1
+; CHECK-NEXT: csinv r8, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: csinv r10, r2, zr, gt
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r10, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #45]
+; CHECK-NEXT: str.w r0, [r4, #41]
+; CHECK-NEXT: str.w r11, [r4, #33]
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #29]
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #25]
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: lsrl r0, r5, #28
+; CHECK-NEXT: orr.w r1, r5, r11, lsl #4
+; CHECK-NEXT: strd r0, r1, [r4, #16]
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r7, #15
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: str r2, [r4, #8]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: str r1, [r4, #4]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: and r1, r7, #15
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: lsrl r0, r9, #28
-; CHECK-NEXT: and r1, r11, #15
-; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r9, r8, lsl #4
-; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: strb.w r8, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: and r0, r6, #15
+; CHECK-NEXT: lsrl r10, r1, #28
+; CHECK-NEXT: orr.w r0, r0, r8, lsl #4
+; CHECK-NEXT: strb.w r10, [r4, #49]
+; CHECK-NEXT: and r1, r9, #15
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r6, r1, #28
-; CHECK-NEXT: strb r6, [r4, #24]
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb r0, [r4, #24]
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9, d10}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -1753,10 +1716,8 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vldr s20, .LCPI31_0
; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
@@ -1764,68 +1725,65 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
+; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vmov r7, s16
; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10}
; CHECK-NEXT: add sp, #4
@@ -1888,16 +1846,15 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #1
-; CHECK-NEXT: and r0, r7, #1
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: csinc r0, r7, zr, ne
; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: bfi r7, r0, #0, #1
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bfi r7, r0, #0, #1
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
@@ -1911,9 +1868,8 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #1
-; CHECK-NEXT: and r0, r4, #1
+; CHECK-NEXT: csinc r0, r4, zr, ne
+; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r7, r0, #1, #1
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
@@ -2288,13 +2244,13 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movne.w r9, #-1
; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csinv r9, r9, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r5
@@ -2302,18 +2258,18 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
+; CHECK-NEXT: csel r7, r0, r6, ne
+; CHECK-NEXT: csel r6, r1, r6, ne
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: csinv r0, r7, zr, ne
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT: vmov q0[3], q0[1], r6, r8
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -2343,23 +2299,23 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI38_0
-; CHECK-NEXT: vmov r11, r5, d8
+; CHECK-NEXT: vmov r11, r10, d8
; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI38_1
; CHECK-NEXT: vmov r5, r8, d9
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: vmov r3, r2, d0
-; CHECK-NEXT: csel r10, r0, r4, ne
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: csel r9, r1, r4, ne
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r8
@@ -2382,21 +2338,22 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: movwne r6, #65535
-; CHECK-NEXT: movtne r6, #3
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csinv r4, r4, zr, ne
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r6, #65535
+; CHECK-NEXT: movtne r6, #3
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
+; CHECK-NEXT: itt ne
; CHECK-NEXT: movwne r9, #65535
; CHECK-NEXT: movtne r9, #3
-; CHECK-NEXT: movne.w r10, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r10, r4
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r0, zr, ne
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r9, r6
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
@@ -2423,60 +2380,64 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI39_0
-; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: vmov r6, r7, d0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI39_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #-1
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmov r5, r8, d9
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: vmov r9, r2, d0
+; CHECK-NEXT: csel r1, r1, r4, ne
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r6, r0, r4, ne
+; CHECK-NEXT: csel r4, r1, r4, ne
; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne.w r7, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csinv r4, r4, zr, ne
+; CHECK-NEXT: csinv r5, r6, zr, ne
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r0, zr, ne
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r0, zr, ne
+; CHECK-NEXT: vmov q0[3], q0[1], r0, r4
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2501,191 +2462,188 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #48
-; CHECK-NEXT: sub sp, #48
+; CHECK-NEXT: .pad #56
+; CHECK-NEXT: sub sp, #56
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI40_0
-; CHECK-NEXT: vmov r11, r4, d8
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r9, r2
+; CHECK-NEXT: vmov r4, r5, d9
+; CHECK-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT: vmov r8, r9, d0
+; CHECK-NEXT: str.w r8, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: vldr d0, .LCPI40_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: csel r4, r2, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: strd r2, r3, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r10, r1, r7, ne
+; CHECK-NEXT: vmov r6, r3, d0
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r5, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r8, r3
+; CHECK-NEXT: str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r6, #8]
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csinv r11, r10, zr, ne
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csel r8, r1, r0, ne
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str.w r10, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r7, [sp, #48] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r6, #4]
-; CHECK-NEXT: mov r5, r6
-; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldr r6, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: str.w r11, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: csinv r8, r8, zr, ne
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: str.w r8, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: lsrl r8, r11, #28
+; CHECK-NEXT: mov r6, r9
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r6, r8
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r9, r1, r0, ne
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov r8, r11, d9
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r5]
-; CHECK-NEXT: mov r10, r9
-; CHECK-NEXT: str.w r9, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r7
+; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r9, zr, ne
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: orr.w r0, r11, r0, lsl #4
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r11, r7
+; CHECK-NEXT: strd r8, r0, [r1, #16]
+; CHECK-NEXT: vmov r8, r4, d8
+; CHECK-NEXT: str r6, [sp, #44] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: ldr r4, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: cmp.w r9, #0
-; CHECK-NEXT: strd r3, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r7, r1, r9, ne
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: csel r7, r2, r7, ne
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: csinv r0, r7, zr, ne
+; CHECK-NEXT: ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r0, [r7, #8]
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: csel r9, r1, r0, ne
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r11, r1, r0, ne
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r9, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: lsrl r0, r7, #28
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: str.w r0, [r9, #16]
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: csinv r0, r11, zr, ne
+; CHECK-NEXT: mov r10, r7
+; CHECK-NEXT: str r0, [r7, #4]
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: ldr r6, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: csel r10, r1, r0, ne
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r11, r1, r0, ne
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #-1
-; CHECK-NEXT: orr.w r0, r7, r10, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #20]
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: csinv r0, r11, zr, ne
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str.w r0, [r10]
+; CHECK-NEXT: ldr.w r11, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: csel r7, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r5, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #15
-; CHECK-NEXT: and r1, r7, #15
-; CHECK-NEXT: lsrl r10, r1, #28
-; CHECK-NEXT: strb.w r10, [r9, #24]
-; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: movne r4, #15
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: and r1, r4, #15
+; CHECK-NEXT: ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb r0, [r7, #24]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldrd r3, r2, [sp, #44] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r7, r1, r0, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #15
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: and r0, r7, #15
+; CHECK-NEXT: movne r4, #15
+; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: and r0, r4, #15
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #12]
-; CHECK-NEXT: add sp, #48
+; CHECK-NEXT: str r0, [r7, #12]
+; CHECK-NEXT: add sp, #56
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2710,17 +2668,16 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .pad #40
+; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI41_0
; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r2, r9, d0
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r11, r2
+; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r8
@@ -2728,156 +2685,152 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: vldr d0, .LCPI41_1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r1, r0, [sp, #24] @ 8-byte Folded Spill
+; CHECK-NEXT: mov r10, r2
+; CHECK-NEXT: vmov r5, r11, d0
; CHECK-NEXT: csel r6, r3, r6, ne
-; CHECK-NEXT: vmov r10, r5, d0
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: str r6, [r4, #28]
-; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r6, zr, ne
+; CHECK-NEXT: str.w r0, [r9, #28]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r5, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: csel r10, r10, r0, ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r5, [r4, #24]
-; CHECK-NEXT: mov r5, r4
-; CHECK-NEXT: mov r4, r9
+; CHECK-NEXT: csinv r0, r10, zr, ne
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: str.w r0, [r9, #24]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r10, r9
+; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: csel r9, r1, r0, ne
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r6, r5
+; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str.w r11, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vmov r1, r11, d8
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: str.w r9, [r5, #20]
+; CHECK-NEXT: csinv r0, r9, zr, ne
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: vmov r6, r11, d8
-; CHECK-NEXT: mov r9, r4
+; CHECK-NEXT: str.w r0, [r10, #20]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: csel r10, r1, r0, ne
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r5, #16]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: csinv r0, r10, zr, ne
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r0, [r7, #16]
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r1, r0, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: csel r4, r3, r8, ne
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r8, r7
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: csinv r0, r4, zr, ne
+; CHECK-NEXT: str r0, [r7, #12]
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r4, [r7, #12]
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r8, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: csinv r0, r8, zr, ne
; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #8]
+; CHECK-NEXT: str r0, [r7, #8]
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: mov r8, r4
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: csinv r0, r4, zr, ne
; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #4]
+; CHECK-NEXT: str r0, [r7, #4]
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7]
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: csinv r0, r4, zr, ne
+; CHECK-NEXT: str r0, [r7]
+; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3335,7 +3288,6 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vcvtb.f32.f16 s22, s18
-; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcvtt.f32.f16 s26, s17
@@ -3344,184 +3296,173 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vcvtt.f32.f16 s20, s18
; CHECK-NEXT: vcvtb.f32.f16 s24, s17
; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: vmov r8, s20
+; CHECK-NEXT: vmov r9, s20
; CHECK-NEXT: vldr s18, .LCPI48_0
-; CHECK-NEXT: vmov r9, s24
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: it lt
+; CHECK-NEXT: vmov r8, s24
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r4, #0
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: vcmp.f32 s26, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: vcmp.f32 s22, s18
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r5, zr, gt
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #3
-; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: str.w r6, [r10, #25]
+; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r6, zr, gt
+; CHECK-NEXT: str.w r0, [r10, #25]
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r4, #65535
; CHECK-NEXT: movtgt r4, #3
-; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s20, s18
-; CHECK-NEXT: vcvtb.f32.f16 s20, s19
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: vcvtb.f32.f16 s20, s19
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r5, #65535
; CHECK-NEXT: movtgt r5, #3
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s20, s18
-; CHECK-NEXT: vcvtt.f32.f16 s20, s19
-; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt.w r9, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movwgt r11, #65535
-; CHECK-NEXT: movtgt r11, #3
+; CHECK-NEXT: vcvtt.f32.f16 s20, s19
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #3
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s20, s18
-; CHECK-NEXT: vcvtb.f32.f16 s20, s16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movlt.w r9, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movwgt r7, #65535
-; CHECK-NEXT: movtgt r7, #3
+; CHECK-NEXT: vcvtb.f32.f16 s20, s16
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r9, #65535
+; CHECK-NEXT: movtgt r9, #3
+; CHECK-NEXT: csinv r8, r0, zr, gt
+; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
-; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s20, s18
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt.w r11, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s20, s18
-; CHECK-NEXT: bfc r11, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: lsrl r2, r11, #28
+; CHECK-NEXT: bfc r7, #18, #14
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: lsrl r2, r7, #28
; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: bfc r4, #18, #14
; CHECK-NEXT: vcvtt.f32.f16 s16, s16
-; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: lsr.w r0, r7, #10
-; CHECK-NEXT: bfc r7, #18, #14
-; CHECK-NEXT: lsll r10, r7, #22
-; CHECK-NEXT: orr.w r1, r11, r7
-; CHECK-NEXT: str.w r1, [r4, #45]
-; CHECK-NEXT: orr.w r1, r2, r10
-; CHECK-NEXT: str.w r1, [r4, #41]
-; CHECK-NEXT: strb.w r0, [r4, #49]
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: str.w r0, [r10]
+; CHECK-NEXT: lsr.w r0, r9, #10
+; CHECK-NEXT: bfc r9, #18, #14
+; CHECK-NEXT: lsll r8, r9, #22
+; CHECK-NEXT: orr.w r1, r7, r9
+; CHECK-NEXT: str.w r1, [r10, #45]
+; CHECK-NEXT: orr.w r1, r2, r8
+; CHECK-NEXT: str.w r1, [r10, #41]
+; CHECK-NEXT: strb.w r0, [r10, #49]
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: lsrl r0, r5, #14
-; CHECK-NEXT: mov r7, r4
-; CHECK-NEXT: orr.w r1, r5, r9, lsl #4
-; CHECK-NEXT: str.w r1, [r4, #37]
-; CHECK-NEXT: str.w r0, [r4, #33]
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: bfc r0, #18, #14
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #18
-; CHECK-NEXT: str.w r0, [r4, #29]
+; CHECK-NEXT: orr.w r1, r5, r6, lsl #4
+; CHECK-NEXT: str.w r1, [r10, #37]
+; CHECK-NEXT: str.w r0, [r10, #33]
+; CHECK-NEXT: orr.w r0, r4, r2, lsl #18
+; CHECK-NEXT: str.w r0, [r10, #29]
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: bfc r1, #18, #14
; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: mov r6, r9
-; CHECK-NEXT: lsll r4, r1, #22
-; CHECK-NEXT: lsrl r6, r3, #28
+; CHECK-NEXT: mov r4, r5
+; CHECK-NEXT: lsll r6, r1, #22
+; CHECK-NEXT: lsrl r4, r3, #28
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r8, #65535
-; CHECK-NEXT: movtgt r8, #3
+; CHECK-NEXT: movwgt r11, #65535
+; CHECK-NEXT: movtgt r11, #3
; CHECK-NEXT: orrs r1, r3
-; CHECK-NEXT: str r1, [r7, #20]
+; CHECK-NEXT: str.w r1, [r10, #20]
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: orr.w r2, r6, r4
+; CHECK-NEXT: orr.w r2, r4, r6
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: vcmp.f32 s16, s18
-; CHECK-NEXT: bfc r8, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
-; CHECK-NEXT: str r2, [r7, #16]
-; CHECK-NEXT: lsr.w r2, r5, #10
-; CHECK-NEXT: strb r2, [r7, #24]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: str.w r2, [r10, #16]
+; CHECK-NEXT: lsr.w r2, r7, #10
; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: strb.w r2, [r10, #24]
+; CHECK-NEXT: bfc r11, #18, #14
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: orr.w r0, r11, r0, lsl #18
; CHECK-NEXT: lsrl r2, r1, #14
-; CHECK-NEXT: orr.w r0, r8, r0, lsl #18
-; CHECK-NEXT: orr.w r1, r1, r9, lsl #4
-; CHECK-NEXT: strd r2, r1, [r7, #8]
-; CHECK-NEXT: str r0, [r7, #4]
+; CHECK-NEXT: orr.w r1, r1, r5, lsl #4
+; CHECK-NEXT: strd r2, r1, [r10, #8]
+; CHECK-NEXT: str.w r0, [r10, #4]
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
@@ -3541,139 +3482,125 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vcvtt.f32.f16 s22, s19
-; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vcvtb.f32.f16 s20, s18
+; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s19
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vldr s28, .LCPI49_0
-; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: vcmp.f32 s20, #0
+; CHECK-NEXT: vcvtb.f32.f16 s22, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcvtt.f32.f16 s20, s16
-; CHECK-NEXT: vcvtt.f32.f16 s24, s18
+; CHECK-NEXT: vcvtt.f32.f16 s16, s16
+; CHECK-NEXT: vcmp.f32 s20, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: vcmp.f32 s22, s28
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r6, s20
-; CHECK-NEXT: vmov r4, s24
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #-1
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vcvtt.f32.f16 s18, s18
+; CHECK-NEXT: vmov r11, s18
+; CHECK-NEXT: vmov r5, s22
+; CHECK-NEXT: csinv r9, r0, zr, gt
+; CHECK-NEXT: csinv r8, r1, zr, gt
+; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s26, #0
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: vcmp.f32 s26, s28
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: csinv r7, r0, zr, gt
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: csinv r10, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vcmp.f32 s24, #0
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcvtt.f32.f16 s16, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: vcmp.f32 s22, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r4, #0
-; CHECK-NEXT: vcmp.f32 s24, s28
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r4, #-1
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: vmov r5, s16
+; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: csinv r4, r0, zr, gt
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: csinv r6, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s20, #0
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: vmov q5[2], q5[0], r4, r7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: vcmp.f32 s20, s28
+; CHECK-NEXT: vcmp.f32 s18, s28
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: vmov q5[3], q5[1], r6, r10
+; CHECK-NEXT: vmov r4, s24
+; CHECK-NEXT: csinv r7, r0, zr, gt
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: csinv r10, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtt.f32.f16 s16, s17
-; CHECK-NEXT: vmov q5[2], q5[0], r0, r7
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q5[3], q5[1], r1, r6
+; CHECK-NEXT: csinv r6, r0, zr, gt
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: csinv r5, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vcmp.f32 s24, #0
+; CHECK-NEXT: vcvtt.f32.f16 s16, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s16, s28
-; CHECK-NEXT: vcvtb.f32.f16 s16, s17
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: vcmp.f32 s24, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: csinv r2, r0, zr, gt
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: vmov q6[2], q6[0], r2, r6
+; CHECK-NEXT: csinv r4, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
+; CHECK-NEXT: vcvtb.f32.f16 s30, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: vmov r2, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtb.f32.f16 s16, s18
-; CHECK-NEXT: vmov q6[2], q6[0], r0, r7
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q6[3], q6[1], r1, r6
+; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
+; CHECK-NEXT: vmov q4[2], q4[0], r9, r7
+; CHECK-NEXT: vmov q4[3], q4[1], r8, r10
+; CHECK-NEXT: csinv r5, r0, zr, gt
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: csinv r4, r1, zr, gt
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: vmov q3[2], q3[0], r10, r9
+; CHECK-NEXT: vcmp.f32 s30, #0
+; CHECK-NEXT: vmov q0, q5
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s30, s28
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s28
-; CHECK-NEXT: vmov q3[3], q3[1], r11, r8
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q2[2], q2[0], r0, r5
-; CHECK-NEXT: vmov q0, q5
-; CHECK-NEXT: vmov q2[3], q2[1], r1, r4
; CHECK-NEXT: vmov q1, q6
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEXT: vmov q2, q4
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: vmov q3[2], q3[0], r0, r5
+; CHECK-NEXT: csinv r0, r1, zr, gt
+; CHECK-NEXT: vmov q3[3], q3[1], r0, r4
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 2
@@ -3693,258 +3620,248 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
-; CHECK-NEXT: .pad #56
-; CHECK-NEXT: sub sp, #56
+; CHECK-NEXT: .pad #80
+; CHECK-NEXT: sub sp, #80
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtb.f32.f16 s22, s17
+; CHECK-NEXT: vcvtt.f32.f16 s22, s16
; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcvtb.f32.f16 s24, s18
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: vldr s20, .LCPI50_0
; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: mov r10, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r7, s24
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: str r3, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: str r0, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: str r2, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcvtb.f32.f16 s22, s19
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vcmp.f32 s24, #0
-; CHECK-NEXT: mov r7, r2
+; CHECK-NEXT: vcvtt.f32.f16 s22, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: str r3, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r1, zr, gt
+; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str.w r2, [r4, #83]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: str.w r1, [r4, #79]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtt.f32.f16 s22, s16
-; CHECK-NEXT: str.w r0, [r4, #75]
-; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: str.w r7, [r4, #58]
-; CHECK-NEXT: str.w r6, [r4, #54]
-; CHECK-NEXT: str.w r5, [r4, #50]
-; CHECK-NEXT: str.w r10, [r4, #33]
-; CHECK-NEXT: str.w r9, [r4, #29]
-; CHECK-NEXT: str.w r8, [r4, #25]
+; CHECK-NEXT: vcvtb.f32.f16 s22, s18
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcvtt.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtt.f32.f16 s22, s17
-; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r1, zr, gt
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: csinv r0, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: vcvtt.f32.f16 s18, s18
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcvtb.f32.f16 s18, s19
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: movgt r6, #15
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: str r0, [sp, #68] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: csinv r9, r1, zr, gt
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: mov r8, r2
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: vcvtt.f32.f16 s18, s19
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r8, r1, zr, gt
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: csinv r11, r2, zr, gt
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: mov r7, r3
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt r7, #15
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: csinv r10, r2, zr, gt
+; CHECK-NEXT: orr.w r1, r1, r10, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #95]
+; CHECK-NEXT: str.w r0, [r4, #91]
+; CHECK-NEXT: str.w r11, [r4, #83]
+; CHECK-NEXT: str.w r8, [r4, #79]
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #75]
+; CHECK-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: lsrl r0, r9, #28
+; CHECK-NEXT: orr.w r1, r9, r8, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #70]
+; CHECK-NEXT: str.w r0, [r4, #66]
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #58]
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #54]
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #50]
+; CHECK-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r11, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #45]
+; CHECK-NEXT: str.w r0, [r4, #41]
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #33]
+; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #29]
+; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r4, #25]
+; CHECK-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r9, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r9, lsl #4
+; CHECK-NEXT: strd r0, r1, [r4, #16]
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r5, #15
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: str r2, [r4, #8]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
; CHECK-NEXT: str r1, [r4, #4]
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: lsrl r0, r11, #28
-; CHECK-NEXT: and r1, r7, #15
-; CHECK-NEXT: str.w r0, [r4, #91]
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: lsrl r0, r9, #28
-; CHECK-NEXT: str.w r0, [r4, #66]
-; CHECK-NEXT: ldr.w lr, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r0, lr
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: ldr.w r12, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r11, r10, lsl #4
+; CHECK-NEXT: and r1, r5, #15
; CHECK-NEXT: lsrl r10, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #95]
+; CHECK-NEXT: str r0, [r4]
; CHECK-NEXT: strb.w r10, [r4, #99]
-; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #87]
-; CHECK-NEXT: orr.w r0, r9, r8, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #70]
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: and r1, r0, #15
+; CHECK-NEXT: and r0, r7, #15
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT: and r1, r6, #15
; CHECK-NEXT: lsrl r8, r1, #28
+; CHECK-NEXT: str.w r0, [r4, #87]
; CHECK-NEXT: strb.w r8, [r4, #74]
-; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str.w r0, [r4, #62]
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb.w r2, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb.w r0, [r4, #49]
+; CHECK-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #76] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, lr, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r5, r2, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb r2, [r4, #24]
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: strb r0, [r4, #24]
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
+; CHECK-NEXT: ldr r1, [sp, #72] @ 4-byte Reload
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r12, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
-; CHECK-NEXT: add sp, #56
+; CHECK-NEXT: add sp, #80
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3979,44 +3896,40 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: add.w r12, r4, #112
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: vcvtb.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vmov r6, s26
+; CHECK-NEXT: vcvtb.f32.f16 s18, s18
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s26
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s28, #0
; CHECK-NEXT: add.w r12, r4, #96
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s28, s20
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vmov r7, s18
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s18
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s26, #0
; CHECK-NEXT: add.w r12, r4, #80
@@ -4028,108 +3941,99 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: vmov r6, s24
; CHECK-NEXT: vcvtb.f32.f16 s26, s17
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
+; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
; CHECK-NEXT: add.w r12, r4, #64
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vmov r7, s26
+; CHECK-NEXT: vcvtt.f32.f16 s16, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s26
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vmov r6, s16
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s16
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s26, #0
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, gt
+; CHECK-NEXT: csinv r3, r3, zr, gt
+; CHECK-NEXT: csinv r2, r2, zr, gt
+; CHECK-NEXT: csinv r1, r1, zr, gt
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEXT: add sp, #4
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index cba0f9cbba2ca..1b91e276c5c1b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -576,10 +576,10 @@ define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, ptr %row_b
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: mov.w r10, #0
-; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: adds r7, r2, r1
; CHECK-NEXT: add.w r1, r1, r1, lsl #1
; CHECK-NEXT: add r1, r2
+; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: dlstp.8 lr, r0
; CHECK-NEXT: .LBB4_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 29b56639bd769..546c39fe11f4b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -665,8 +665,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: subs.w r3, r0, #-1
; CHECK-NEXT: sbcs r1, r1, #0
-; CHECK-NEXT: it hs
-; CHECK-NEXT: movhs.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, hs
; CHECK-NEXT: str r0, [r2], #4
; CHECK-NEXT: le lr, .LBB3_7
; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup
@@ -832,8 +831,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: sbcs r1, r1, #0
-; CHECK-NEXT: it hs
-; CHECK-NEXT: movhs.w r0, #-1
+; CHECK-NEXT: csinv r0, r0, zr, hs
; CHECK-NEXT: str r0, [r11], #4
; CHECK-NEXT: le lr, .LBB4_7
; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup
diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
index bf0d92b5e0303..7280093375f12 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
@@ -487,23 +487,20 @@ for.cond.cleanup: ; preds = %vector.body
define <2 x i64> @large_i128(<2 x double> %x) {
; CHECK-LABEL: large_i128:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r7, r2, #1
-; CHECK-NEXT: mov.w r9, #1
-; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: csel r0, r0, r7, ne
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r1, r1, r7, ne
-; CHECK-NEXT: csel r2, r2, r9, ne
+; CHECK-NEXT: csinc r2, r2, zr, eq
; CHECK-NEXT: rsbs r7, r0, #0
; CHECK-NEXT: sbcs.w r7, r4, r1
; CHECK-NEXT: sbcs.w r2, r4, r2
@@ -522,7 +519,7 @@ define <2 x i64> @large_i128(<2 x double> %x) {
; CHECK-NEXT: csel r0, r0, r5, ne
; CHECK-NEXT: csel r3, r3, r5, ne
; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: csel r2, r2, r9, ne
+; CHECK-NEXT: csinc r2, r2, zr, eq
; CHECK-NEXT: rsbs r5, r0, #0
; CHECK-NEXT: sbcs.w r5, r4, r1
; CHECK-NEXT: sbcs.w r2, r4, r2
@@ -533,8 +530,7 @@ define <2 x i64> @large_i128(<2 x double> %x) {
; CHECK-NEXT: csel r3, r1, r3, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
More information about the llvm-commits
mailing list