[llvm] [ARM] Switch to soft promoting half types. (PR #80440)
Harald van Dijk via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 07:08:51 PST 2024
https://github.com/hvdijk updated https://github.com/llvm/llvm-project/pull/80440
>From b5456877f29194d82123992622c09f71e19f3c66 Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald at gigawatt.nl>
Date: Fri, 2 Feb 2024 15:08:39 +0000
Subject: [PATCH] [ARM] Switch to soft promoting half types.
The traditional promotion is known to generate wrong code.
Fixes #73805.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 4 +
llvm/test/CodeGen/ARM/aes-erratum-fix.ll | 1098 ++++++++---------
llvm/test/CodeGen/ARM/arm-half-promote.ll | 155 ++-
llvm/test/CodeGen/ARM/fp16-args.ll | 40 -
llvm/test/CodeGen/ARM/fp16-instructions.ll | 166 +--
llvm/test/CodeGen/ARM/fp16-promote.ll | 146 +--
llvm/test/CodeGen/ARM/llvm.exp10.ll | 63 +-
llvm/test/CodeGen/ARM/llvm.frexp.ll | 59 +-
.../vecreduce-fadd-legalization-soft-float.ll | 64 +-
.../ARM/vecreduce-fadd-legalization-strict.ll | 3 -
.../vecreduce-fmax-legalization-soft-float.ll | 55 +-
.../vecreduce-fmin-legalization-soft-float.ll | 55 +-
.../vecreduce-fmul-legalization-soft-float.ll | 32 +-
.../ARM/vecreduce-fmul-legalization-strict.ll | 3 -
llvm/test/CodeGen/Thumb2/mve-vabd.ll | 30 +-
16 files changed, 929 insertions(+), 1048 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bf8c877a547cd..b5c4a8a322ea7 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
if (getTypeAction(*DAG.getContext(), EltVT) ==
- TargetLowering::TypePromoteFloat) {
+ TargetLowering::TypeSoftPromoteHalf) {
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
// but the type system will try to do that if we don't intervene.
// Reinterpret any such vector-element insertion as one with the
@@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
assert(getTypeAction(*DAG.getContext(), IEltVT) !=
- TargetLowering::TypePromoteFloat);
+ TargetLowering::TypeSoftPromoteHalf);
SDValue VecIn = Op.getOperand(0);
EVT VecVT = VecIn.getValueType();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index d61a68780e3e1..b13ddf697cb80 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -762,6 +762,10 @@ class VectorType;
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;
+ bool softPromoteHalfType() const override { return true; }
+
+ bool useFPRegsForHalfType() const override { return true; }
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index f9b62df37ff32..9c2da345956d1 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -1355,102 +1355,89 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
-; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
-; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0]
; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
-; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
-; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
-; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
@@ -1460,94 +1447,79 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-CORTEX-FIX-NEXT: .pad #24
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3
+; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: mov r3, r6
+; CHECK-CORTEX-FIX-NEXT: b .LBB36_3
; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
-; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: mov r0, r3
-; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
-; CHECK-CORTEX-FIX-NEXT: mov r3, r0
-; CHECK-CORTEX-FIX-NEXT: b .LBB36_5
-; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
-; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
-; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32]
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
-; CHECK-CORTEX-FIX-NEXT: .LBB36_4:
-; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
-; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: beq .LBB36_5
+; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1]
+; CHECK-CORTEX-FIX-NEXT: b .LBB36_6
; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB36_6:
+; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3]
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
-; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9
+; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
@@ -1604,210 +1576,179 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
-; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3
; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
; CHECK-FIX-NOSCHED-NEXT: .LBB37_3:
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2]
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5
; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
-; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
-; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
-; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
-; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
-; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
-; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6
; CHECK-FIX-NOSCHED-NEXT: .LBB37_5:
-; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
-; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
-; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
-; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
+; CHECK-FIX-NOSCHED-NEXT: mov r0, lr
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr
+; CHECK-FIX-NOSCHED-NEXT: mov lr, r0
; CHECK-FIX-NOSCHED-NEXT: .LBB37_6:
-; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
+; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-CORTEX-FIX-NEXT: .pad #28
-; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
-; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
+; CHECK-CORTEX-FIX-NEXT: .pad #12
+; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
+; CHECK-CORTEX-FIX-NEXT: beq .LBB37_3
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
-; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: b .LBB37_3
+; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
+; CHECK-CORTEX-FIX-NEXT: bne .LBB37_4
; CHECK-CORTEX-FIX-NEXT: .LBB37_2:
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
-; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
-; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
+; CHECK-CORTEX-FIX-NEXT: vmov s0, lr
+; CHECK-CORTEX-FIX-NEXT: b .LBB37_5
+; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
+; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
+; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
+; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
-; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB37_5
-; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
-; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: b .LBB37_6
+; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
+; CHECK-CORTEX-FIX-NEXT: .LBB37_4:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
; CHECK-CORTEX-FIX-NEXT: .LBB37_5:
-; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: mov r0, r7
-; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
-; CHECK-CORTEX-FIX-NEXT: mov r7, r0
-; CHECK-CORTEX-FIX-NEXT: .LBB37_6:
-; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
-; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov r7, s2
+; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr
+; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %11
@@ -3567,102 +3508,89 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4
; CHECK-FIX-NOSCHED-NEXT: .LBB82_2:
-; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
-; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0]
; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5
; CHECK-FIX-NOSCHED-NEXT: .LBB82_3:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2
; CHECK-FIX-NOSCHED-NEXT: .LBB82_4:
-; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
-; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
; CHECK-FIX-NOSCHED-NEXT: .LBB82_5:
-; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
@@ -3672,94 +3600,79 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-CORTEX-FIX-NEXT: .pad #24
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB82_3
+; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: bne .LBB82_4
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: mov r3, r6
+; CHECK-CORTEX-FIX-NEXT: b .LBB82_3
; CHECK-CORTEX-FIX-NEXT: .LBB82_2:
-; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: mov r0, r3
-; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
-; CHECK-CORTEX-FIX-NEXT: mov r3, r0
-; CHECK-CORTEX-FIX-NEXT: b .LBB82_5
-; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
-; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
-; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32]
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
-; CHECK-CORTEX-FIX-NEXT: .LBB82_4:
-; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
-; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: beq .LBB82_5
+; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1]
+; CHECK-CORTEX-FIX-NEXT: b .LBB82_6
; CHECK-CORTEX-FIX-NEXT: .LBB82_5:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB82_6:
+; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3]
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
-; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9
+; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
@@ -3816,210 +3729,179 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
-; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3
; CHECK-FIX-NOSCHED-NEXT: .LBB83_2:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
; CHECK-FIX-NOSCHED-NEXT: .LBB83_3:
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2]
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5
; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
-; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
-; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
-; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
-; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
-; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
-; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6
; CHECK-FIX-NOSCHED-NEXT: .LBB83_5:
-; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
-; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
-; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
-; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
+; CHECK-FIX-NOSCHED-NEXT: mov r0, lr
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr
+; CHECK-FIX-NOSCHED-NEXT: mov lr, r0
; CHECK-FIX-NOSCHED-NEXT: .LBB83_6:
-; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
+; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-CORTEX-FIX-NEXT: .pad #28
-; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
-; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
+; CHECK-CORTEX-FIX-NEXT: .pad #12
+; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
+; CHECK-CORTEX-FIX-NEXT: beq .LBB83_3
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
-; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: b .LBB83_3
+; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
+; CHECK-CORTEX-FIX-NEXT: bne .LBB83_4
; CHECK-CORTEX-FIX-NEXT: .LBB83_2:
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
-; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
-; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
+; CHECK-CORTEX-FIX-NEXT: vmov s0, lr
+; CHECK-CORTEX-FIX-NEXT: b .LBB83_5
+; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
+; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
+; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
+; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
-; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB83_5
-; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
-; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: b .LBB83_6
+; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
+; CHECK-CORTEX-FIX-NEXT: .LBB83_4:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
; CHECK-CORTEX-FIX-NEXT: .LBB83_5:
-; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: mov r0, r7
-; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
-; CHECK-CORTEX-FIX-NEXT: mov r7, r0
-; CHECK-CORTEX-FIX-NEXT: .LBB83_6:
-; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
-; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov r7, s2
+; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr
+; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %11
diff --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll
index d6a8a9b9538f1..e1ab75b2ac7f1 100644
--- a/llvm/test/CodeGen/ARM/arm-half-promote.ll
+++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll
@@ -2,78 +2,113 @@
define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() {
; CHECK-LABEL: _f1
-; CHECK: vpush {d8}
-; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
-; CHECK-NEXT: vmov.i32 d8, #0x0
-; CHECK-NEXT: vmov.i32 d0, #0x0
-; CHECK-NEXT: vmov.i32 d1, #0x0
-; CHECK-NEXT: vmov.i32 d2, #0x0
-; CHECK-NEXT: vmov.i32 d3, #0x0
-; CHECK-NEXT: vmov.i32 d4, #0x0
-; CHECK-NEXT: vmov.i32 d5, #0x0
-; CHECK-NEXT: vmov.i32 d6, #0x0
-; CHECK-NEXT: vmov.i32 d7, #0x0
-; CHECK-NEXT: vmov.f32 s1, s16
-; CHECK-NEXT: vmov.f32 s3, s16
-; CHECK-NEXT: vmov.f32 s5, s16
-; CHECK-NEXT: vmov.f32 s7, s16
-; CHECK-NEXT: vmov.f32 s9, s16
-; CHECK-NEXT: vmov.f32 s11, s16
-; CHECK-NEXT: vmov.f32 s13, s16
-; CHECK-NEXT: vmov.f32 s15, s16
-; CHECK-NEXT: vpop {d8}
+; CHECK: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov d4, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[1]
+; CHECK-NEXT: vmov d8, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[2]
+; CHECK-NEXT: vmov d5, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[3]
+; CHECK-NEXT: vmov d9, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[0]
+; CHECK-NEXT: vmov d6, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[1]
+; CHECK-NEXT: vmov d10, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[2]
+; CHECK-NEXT: vmov d7, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[3]
+; CHECK-NEXT: vmov d11, r0, r0
+; CHECK: vmov.f32 s0, s8
+; CHECK: vmov.f32 s1, s16
+; CHECK: vmov.f32 s2, s10
+; CHECK: vmov.f32 s3, s18
+; CHECK: vmov.f32 s4, s12
+; CHECK: vmov.f32 s5, s20
+; CHECK: vmov.f32 s6, s14
+; CHECK: vmov.f32 s7, s22
+; CHECK: vmov.f32 s9, s16
+; CHECK: vmov.f32 s11, s18
+; CHECK: vmov.f32 s13, s20
+; CHECK: vmov.f32 s15, s22
+; CHECK: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
+
ret { <8 x half>, <8 x half> } zeroinitializer
}
define swiftcc { <8 x half>, <8 x half> } @f2() {
; CHECK-LABEL: _f2
-; CHECK: vpush {d8}
-; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
-; CHECK-NEXT: vmov.i32 d8, #0x0
-; CHECK-NEXT: vmov.i32 d0, #0x0
-; CHECK-NEXT: vmov.i32 d1, #0x0
-; CHECK-NEXT: vmov.i32 d2, #0x0
-; CHECK-NEXT: vmov.i32 d3, #0x0
-; CHECK-NEXT: vmov.i32 d4, #0x0
-; CHECK-NEXT: vmov.i32 d5, #0x0
-; CHECK-NEXT: vmov.i32 d6, #0x0
-; CHECK-NEXT: vmov.i32 d7, #0x0
-; CHECK-NEXT: vmov.f32 s1, s16
-; CHECK-NEXT: vmov.f32 s3, s16
-; CHECK-NEXT: vmov.f32 s5, s16
-; CHECK-NEXT: vmov.f32 s7, s16
-; CHECK-NEXT: vmov.f32 s9, s16
-; CHECK-NEXT: vmov.f32 s11, s16
-; CHECK-NEXT: vmov.f32 s13, s16
-; CHECK-NEXT: vmov.f32 s15, s16
-; CHECK-NEXT: vpop {d8}
+; CHECK: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov d4, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[1]
+; CHECK-NEXT: vmov d8, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[2]
+; CHECK-NEXT: vmov d5, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[3]
+; CHECK-NEXT: vmov d9, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[0]
+; CHECK-NEXT: vmov d6, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[1]
+; CHECK-NEXT: vmov d10, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[2]
+; CHECK-NEXT: vmov d7, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[3]
+; CHECK-NEXT: vmov d11, r0, r0
+; CHECK: vmov.f32 s0, s8
+; CHECK: vmov.f32 s1, s16
+; CHECK: vmov.f32 s2, s10
+; CHECK: vmov.f32 s3, s18
+; CHECK: vmov.f32 s4, s12
+; CHECK: vmov.f32 s5, s20
+; CHECK: vmov.f32 s6, s14
+; CHECK: vmov.f32 s7, s22
+; CHECK: vmov.f32 s9, s16
+; CHECK: vmov.f32 s11, s18
+; CHECK: vmov.f32 s13, s20
+; CHECK: vmov.f32 s15, s22
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
+
ret { <8 x half>, <8 x half> } zeroinitializer
}
define fastcc { <8 x half>, <8 x half> } @f3() {
; CHECK-LABEL: _f3
-; CHECK: vpush {d8}
-; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
-; CHECK-NEXT: vmov.i32 d8, #0x0
-; CHECK-NEXT: vmov.i32 d0, #0x0
-; CHECK-NEXT: vmov.i32 d1, #0x0
-; CHECK-NEXT: vmov.i32 d2, #0x0
-; CHECK-NEXT: vmov.i32 d3, #0x0
-; CHECK-NEXT: vmov.i32 d4, #0x0
-; CHECK-NEXT: vmov.i32 d5, #0x0
-; CHECK-NEXT: vmov.i32 d6, #0x0
-; CHECK-NEXT: vmov.i32 d7, #0x0
-; CHECK-NEXT: vmov.f32 s1, s16
-; CHECK-NEXT: vmov.f32 s3, s16
-; CHECK-NEXT: vmov.f32 s5, s16
-; CHECK-NEXT: vmov.f32 s7, s16
-; CHECK-NEXT: vmov.f32 s9, s16
-; CHECK-NEXT: vmov.f32 s11, s16
-; CHECK-NEXT: vmov.f32 s13, s16
-; CHECK-NEXT: vmov.f32 s15, s16
-; CHECK-NEXT: vpop {d8}
+; CHECK: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov d4, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[1]
+; CHECK-NEXT: vmov d8, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[2]
+; CHECK-NEXT: vmov d5, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d16[3]
+; CHECK-NEXT: vmov d9, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[0]
+; CHECK-NEXT: vmov d6, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[1]
+; CHECK-NEXT: vmov d10, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[2]
+; CHECK-NEXT: vmov d7, r0, r0
+; CHECK-NEXT: vmov.u16 r0, d17[3]
+; CHECK-NEXT: vmov d11, r0, r0
+; CHECK: vmov.f32 s0, s8
+; CHECK: vmov.f32 s1, s16
+; CHECK: vmov.f32 s2, s10
+; CHECK: vmov.f32 s3, s18
+; CHECK: vmov.f32 s4, s12
+; CHECK: vmov.f32 s5, s20
+; CHECK: vmov.f32 s6, s14
+; CHECK: vmov.f32 s7, s22
+; CHECK: vmov.f32 s9, s16
+; CHECK: vmov.f32 s11, s18
+; CHECK: vmov.f32 s13, s20
+; CHECK: vmov.f32 s15, s22
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
ret { <8 x half>, <8 x half> } zeroinitializer
diff --git a/llvm/test/CodeGen/ARM/fp16-args.ll b/llvm/test/CodeGen/ARM/fp16-args.ll
index 18bbcd12c768a..cd039b87d4a3f 100644
--- a/llvm/test/CodeGen/ARM/fp16-args.ll
+++ b/llvm/test/CodeGen/ARM/fp16-args.ll
@@ -46,46 +46,6 @@ entry:
}
define <4 x half> @foo_vec(<4 x half> %a) {
-; SOFT-LABEL: foo_vec:
-; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: vmov s0, r3
-; SOFT-NEXT: vmov s2, r1
-; SOFT-NEXT: vcvtb.f32.f16 s0, s0
-; SOFT-NEXT: vmov s4, r0
-; SOFT-NEXT: vcvtb.f32.f16 s2, s2
-; SOFT-NEXT: vmov s6, r2
-; SOFT-NEXT: vcvtb.f32.f16 s4, s4
-; SOFT-NEXT: vcvtb.f32.f16 s6, s6
-; SOFT-NEXT: vadd.f32 s0, s0, s0
-; SOFT-NEXT: vadd.f32 s2, s2, s2
-; SOFT-NEXT: vcvtb.f16.f32 s0, s0
-; SOFT-NEXT: vadd.f32 s4, s4, s4
-; SOFT-NEXT: vcvtb.f16.f32 s2, s2
-; SOFT-NEXT: vadd.f32 s6, s6, s6
-; SOFT-NEXT: vcvtb.f16.f32 s4, s4
-; SOFT-NEXT: vcvtb.f16.f32 s6, s6
-; SOFT-NEXT: vmov r0, s4
-; SOFT-NEXT: vmov r1, s2
-; SOFT-NEXT: vmov r2, s6
-; SOFT-NEXT: vmov r3, s0
-; SOFT-NEXT: bx lr
-;
-; HARD-LABEL: foo_vec:
-; HARD: @ %bb.0: @ %entry
-; HARD-NEXT: vcvtb.f32.f16 s4, s3
-; HARD-NEXT: vcvtb.f32.f16 s2, s2
-; HARD-NEXT: vcvtb.f32.f16 s6, s1
-; HARD-NEXT: vcvtb.f32.f16 s0, s0
-; HARD-NEXT: vadd.f32 s2, s2, s2
-; HARD-NEXT: vadd.f32 s0, s0, s0
-; HARD-NEXT: vcvtb.f16.f32 s2, s2
-; HARD-NEXT: vadd.f32 s4, s4, s4
-; HARD-NEXT: vcvtb.f16.f32 s0, s0
-; HARD-NEXT: vadd.f32 s6, s6, s6
-; HARD-NEXT: vcvtb.f16.f32 s3, s4
-; HARD-NEXT: vcvtb.f16.f32 s1, s6
-; HARD-NEXT: bx lr
-;
; FULL-SOFT-LE-LABEL: foo_vec:
; FULL-SOFT-LE: @ %bb.0: @ %entry
; FULL-SOFT-LE-NEXT: vmov d16, r0, r1
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index 8477cb7e02b23..1988cb1d2f903 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -85,8 +85,8 @@ entry:
; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
-; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
-; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
+; CHECK-HARDFP-VFP3: vmov r
+; CHECK-HARDFP-VFP3: vmov.f32 s
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: vadd.f32
@@ -368,8 +368,8 @@ entry:
; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
-; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
-; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
+; CHECK-HARDFP-VFP3: vmov r
+; CHECK-HARDFP-VFP3: vmov.f32 s
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: vdiv.f32
@@ -590,8 +590,8 @@ entry:
; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
-; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
-; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
+; CHECK-HARDFP-VFP3: vmov r
+; CHECK-HARDFP-VFP3: vmov.f32 s
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: vmul.f32
@@ -700,18 +700,19 @@ define half @select_cc1(ptr %a0) {
; CHECK-LABEL: select_cc1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0,
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movne r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
-; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32: it eq
-; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
+; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-SOFTFP-FP16-T32-NEXT: itt eq
+; CHECK-SOFTFP-FP16-T32-NEXT: movweq r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movteq r0,
}
; FIXME: more tests need to be added for VSELGE and VSELGT.
@@ -727,18 +728,19 @@ define half @select_cc_ge1(ptr %a0) {
; CHECK-LABEL: select_cc_ge1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movlt r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it ge
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt ge
+; CHECK-SOFTFP-FP16-T32-NEXT: movwge r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtge r0,
}
define half @select_cc_ge2(ptr %a0) {
@@ -749,18 +751,19 @@ define half @select_cc_ge2(ptr %a0) {
; CHECK-LABEL: select_cc_ge2:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movhi r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it ls
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt ls
+; CHECK-SOFTFP-FP16-T32-NEXT: movwls r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtls r0,
}
define half @select_cc_ge3(ptr %a0) {
@@ -771,18 +774,19 @@ define half @select_cc_ge3(ptr %a0) {
; CHECK-LABEL: select_cc_ge3:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movls r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it hi
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt hi
+; CHECK-SOFTFP-FP16-T32-NEXT: movwhi r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movthi r0,
}
define half @select_cc_ge4(ptr %a0) {
@@ -793,18 +797,19 @@ define half @select_cc_ge4(ptr %a0) {
; CHECK-LABEL: select_cc_ge4:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movge r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it lt
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt lt
+; CHECK-SOFTFP-FP16-T32-NEXT: movwlt r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtlt r0,
}
; 37. VSELGT
@@ -816,18 +821,19 @@ define half @select_cc_gt1(ptr %a0) {
; CHECK-LABEL: select_cc_gt1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movle r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it gt
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt gt
+; CHECK-SOFTFP-FP16-T32-NEXT: movwgt r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtgt r0,
}
define half @select_cc_gt2(ptr %a0) {
@@ -838,18 +844,19 @@ define half @select_cc_gt2(ptr %a0) {
; CHECK-LABEL: select_cc_gt2:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movmi r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it pl
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt pl
+; CHECK-SOFTFP-FP16-T32-NEXT: movwpl r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtpl r0,
}
define half @select_cc_gt3(ptr %a0) {
@@ -860,18 +867,19 @@ define half @select_cc_gt3(ptr %a0) {
; CHECK-LABEL: select_cc_gt3:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movgt r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it le
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt le
+; CHECK-SOFTFP-FP16-T32-NEXT: movwle r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtle r0,
}
define half @select_cc_gt4(ptr %a0) {
@@ -882,18 +890,19 @@ define half @select_cc_gt4(ptr %a0) {
; CHECK-LABEL: select_cc_gt4:
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6
+; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
-; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-A32-NEXT: movpl r0,
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
+; CHECK-SOFTFP-FP16-T32: vcmp.f32
; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32-NEXT: it mi
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
+; CHECK-SOFTFP-FP16-T32-NEXT: itt mi
+; CHECK-SOFTFP-FP16-T32-NEXT: movwmi r0,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtmi r0,
}
; 38. VSELVS
@@ -924,26 +933,25 @@ entry:
; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
-; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
+; CHECK-SOFTFP-FP16-A32: ldr r1, .LCP{{.*}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
-; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
-; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
-; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
+; CHECK-SOFTFP-FP16-A32: mov r0, r1
+; CHECK-SOFTFP-FP16-A32-NEXT: movne r0, #2
+; CHECK-SOFTFP-FP16-A32-NEXT: movvs r0, r1
; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
-; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
-; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
-; CHECK-SOFTFP-FP16-T32: it eq
-; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
-; CHECK-SOFTFP-FP16-T32: it vs
-; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
-; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
+; CHECK-SOFTFP-FP16-T32: itt eq
+; CHECK-SOFTFP-FP16-T32-NEXT: movweq r1,
+; CHECK-SOFTFP-FP16-T32-NEXT: movteq r1,
+; CHECK-SOFTFP-FP16-T32-NEXT: itt vs
+; CHECK-SOFTFP-FP16-T32-NEXT: movwvs r1,
+; CHECK-SOFTFP-FP16-T32-NEXT: movtvs r1,
+; CHECK-SOFTFP-FP16-T32-NEXT: uxth r0, r1
}
; 40. VSUB
@@ -986,8 +994,8 @@ entry:
; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
-; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
-; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
+; CHECK-HARDFP-VFP3: vmov r
+; CHECK-HARDFP-VFP3: vmov.f32 s
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: bl __aeabi_h2f
; CHECK-HARDFP-VFP3: vsub.f32
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 69820850893ef..9c01129ff30d8 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -660,10 +660,8 @@ define void @test_maxnum(ptr %p, ptr %q) #0 {
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
; CHECK-VFP: vcmp.f32
; CHECK-VFP: vmrs
-; CHECK-VFP: vmovlt.f32
+; CHECK-VFP: movge
; CHECK-NOVFP: bl __aeabi_fcmpge
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_minimum(ptr %p) #0 {
%a = load half, ptr %p, align 2
%c = fcmp ult half %a, 1.0
@@ -680,10 +678,8 @@ define void @test_minimum(ptr %p) #0 {
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
; CHECK-VFP: vcmp.f32
; CHECK-VFP: vmrs
-; CHECK-VFP: vmovhi.f32
+; CHECK-VFP: movls
; CHECK-NOVFP: bl __aeabi_fcmple
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_maximum(ptr %p) #0 {
%a = load half, ptr %p, align 2
%c = fcmp ugt half %a, 1.0
@@ -692,45 +688,15 @@ define void @test_maximum(ptr %p) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_copysign:
-; CHECK-FP16: ldrh r2, [r0]
-; CHECK-FP16-NEXT: vmov.i32 d16, #0x80000000
-; CHECK-FP16-NEXT: ldrh r1, [r1]
-; CHECK-FP16-NEXT: vmov s0, r2
-; CHECK-FP16-NEXT: vmov s2, r1
-; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-FP16-NEXT: vcvtb.f32.f16 s2, s2
-; CHECK-FP16-NEXT: vbit d0, d1, d16
-; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0
-; CHECK-FP16-NEXT: vmov r1, s0
-; CHECK-FP16-NEXT: strh r1, [r0]
-; CHECK-FP16-NEXT: bx lr
-
-; CHECK-LIBCALL-LABEL: test_copysign:
-; CHECK-LIBCALL-VFP: .fnstart
-; CHECK-LIBCALL-VFP-NEXT: .save {r4, r5, r11, lr}
-; CHECK-LIBCALL-VFP-NEXT: push {r4, r5, r11, lr}
-; CHECK-LIBCALL-VFP-NEXT: .vsave {d8, d9}
-; CHECK-LIBCALL-VFP-NEXT: vpush {d8, d9}
-; CHECK-LIBCALL-VFP-NEXT: mov r5, r0
-; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r0]
-; CHECK-LIBCALL-VFP-NEXT: mov r4, r1
-; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL-VFP: ldrh r1, [r4]
-; CHECK-LIBCALL-VFP-NEXT: vmov s18, r0
-; CHECK-LIBCALL-VFP-NEXT: vmov.i32 d8, #0x80000000
-; CHECK-LIBCALL-VFP-NEXT: mov r0, r1
-; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL-VFP: vmov s0, r0
-; CHECK-LIBCALL-VFP-NEXT: vbif d0, d9, d8
-; CHECK-LIBCALL-VFP-NEXT: vmov r0, s0
-; CHECK-LIBCALL: bl __aeabi_f2h
-; CHECK-LIBCALL-VFP: strh r0, [r5]
-; CHECK-LIBCALL-VFP-NEXT: vpop {d8, d9}
-; CHECK-LIBCALL-VFP-NEXT: pop {r4, r5, r11, pc}
-; CHECK-NOVFP: and
-; CHECK-NOVFP: bic
-; CHECK-NOVFP: orr
+; CHECK-ALL-LABEL: test_copysign:
+; CHECK-ALL: ldrh r2, [r0]
+; CHECK-ALL-NEXT: ldrh r1, [r1]
+; CHECK-ALL-NEXT: and r1, r1, #32768
+; CHECK-ALL-NEXT: bfc r2, #15, #17
+; CHECK-ALL-NEXT: orr r1, r2, r1
+; CHECK-ALL-NEXT: strh r1, [r0]
+; CHECK-ALL-NEXT: bx lr
+
define void @test_copysign(ptr %p, ptr %q) #0 {
%a = load half, ptr %p, align 2
%b = load half, ptr %q, align 2
@@ -832,16 +798,23 @@ define void @test_round(ptr %p) {
; CHECK-FP16-LABEL: test_fmuladd:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vmul.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vmla.f32
+; CHECK-FP16: vadd.f32
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fmuladd:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL-VFP: vmla.f32
+; CHECK-LIBCALL-VFP: vmul.f32
; CHECK-NOVFP: bl __aeabi_fmul
; CHECK-LIBCALL: bl __aeabi_f2h
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: vadd.f32
+; CHECK-NOVFP: bl __aeabi_fadd
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fmuladd(ptr %p, ptr %q, ptr %r) #0 {
%a = load half, ptr %p, align 2
%b = load half, ptr %q, align 2
@@ -858,41 +831,21 @@ define void @test_fmuladd(ptr %p, ptr %q, ptr %r) #0 {
; CHECK-ALL-LABEL: test_insertelement:
; CHECK-ALL: sub sp, sp, #8
-; CHECK-VFP: and
-; CHECK-VFP: mov
-; CHECK-VFP: ldrd
-; CHECK-VFP: orr
-; CHECK-VFP: ldrh
-; CHECK-VFP: stm
-; CHECK-VFP: strh
-; CHECK-VFP: ldrh
-; CHECK-VFP: ldrh
-; CHECK-VFP: ldrh
-; CHECK-VFP: ldrh
-; CHECK-VFP: strh
-; CHECK-VFP: strh
-; CHECK-VFP: strh
-; CHECK-VFP: strh
-
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: mov
-; CHECK-NOVFP-DAG: ldrh
-; CHECK-NOVFP-DAG: orr
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: ldrh
-; CHECK-NOVFP-DAG: ldrh
-; CHECK-NOVFP-DAG: ldrh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
-; CHECK-NOVFP-DAG: strh
+; CHECK-ALL-DAG: and
+; CHECK-ALL-DAG: mov
+; CHECK-ALL-DAG: ldrd
+; CHECK-ALL-DAG: orr
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: stm
+; CHECK-ALL: ldrh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL: strh
; CHECK-ALL: add sp, sp, #8
define void @test_insertelement(ptr %p, ptr %q, i32 %i) #0 {
@@ -904,24 +857,15 @@ define void @test_insertelement(ptr %p, ptr %q, i32 %i) #0 {
}
; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-VFP: push {{{.*}}, lr}
-; CHECK-VFP: sub sp, sp, #8
-; CHECK-VFP: ldrd
-; CHECK-VFP: mov
-; CHECK-VFP: orr
-; CHECK-VFP: ldrh
-; CHECK-VFP: strh
-; CHECK-VFP: add sp, sp, #8
-; CHECK-VFP: pop {{{.*}}, pc}
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: strh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: strh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: strh
-; CHECK-NOVFP: ldrh
-; CHECK-NOVFP: strh
-; CHECK-NOVFP: ldrh
+; CHECK-ALL: push {{{.*}}, lr}
+; CHECK-ALL: sub sp, sp, #8
+; CHECK-ALL: ldrd
+; CHECK-ALL: mov
+; CHECK-ALL: orr
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: add sp, sp, #8
+; CHECK-ALL: pop {{{.*}}, pc}
define void @test_extractelement(ptr %p, ptr %q, i32 %i) #0 {
%a = load <4 x half>, ptr %q, align 8
%b = extractelement <4 x half> %a, i32 %i
diff --git a/llvm/test/CodeGen/ARM/llvm.exp10.ll b/llvm/test/CodeGen/ARM/llvm.exp10.ll
index 9e2688c988f76..eb72fe8c1e1b7 100644
--- a/llvm/test/CodeGen/ARM/llvm.exp10.ll
+++ b/llvm/test/CodeGen/ARM/llvm.exp10.ll
@@ -36,6 +36,8 @@ define <1 x half> @exp10_v1f16(<1 x half> %x) {
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: pop {r7, pc}
%r = call <1 x half> @llvm.exp10.v1f16(<1 x half> %x)
ret <1 x half> %r
@@ -44,19 +46,26 @@ define <1 x half> @exp10_v1f16(<1 x half> %x) {
define <2 x half> @exp10_v2f16(<2 x half> %x) {
; CHECK-LABEL: exp10_v2f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: strh.w r0, [sp, #6]
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r1, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: strh.w r0, [sp, #4]
+; CHECK-NEXT: add r0, sp, #4
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r1, d16[1]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r4, pc}
%r = call <2 x half> @llvm.exp10.v2f16(<2 x half> %x)
ret <2 x half> %r
}
@@ -65,24 +74,27 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
; CHECK-LABEL: exp10_v3f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: pkhbt r5, r0, r6, lsl #16
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: uxth r0, r0
+; CHECK-NEXT: vmov d16, r5, r0
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov.u16 r1, d16[1]
+; CHECK-NEXT: vmov.u16 r2, d16[2]
; CHECK-NEXT: pop {r4, r5, r6, pc}
%r = call <3 x half> @llvm.exp10.v3f16(<3 x half> %x)
ret <3 x half> %r
@@ -93,31 +105,34 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r3
; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: pkhbt r6, r0, r7, lsl #16
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r3, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: pkhbt r0, r0, r5, lsl #16
+; CHECK-NEXT: vmov d16, r0, r6
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov.u16 r1, d16[1]
+; CHECK-NEXT: vmov.u16 r2, d16[2]
+; CHECK-NEXT: vmov.u16 r3, d16[3]
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%r = call <4 x half> @llvm.exp10.v4f16(<4 x half> %x)
diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll
index 7dbaa639fa409..e79ddbe93336e 100644
--- a/llvm/test/CodeGen/ARM/llvm.frexp.ll
+++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll
@@ -52,36 +52,36 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
; CHECK-LABEL: test_frexp_v2f16_v2i32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vpush {d8}
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: add r5, sp, #4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: vld1.32 {d8[0]}, [r5:32]
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r4, sp, #4
+; CHECK-NEXT: add r4, sp, #8
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: mov r7, sp
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: vld1.32 {d8[0]}, [r7:32]
-; CHECK-NEXT: vld1.32 {d8[1]}, [r4:32]
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: strh.w r0, [sp, #14]
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r1, r0
+; CHECK-NEXT: strh.w r0, [sp, #12]
+; CHECK-NEXT: add r0, sp, #12
+; CHECK-NEXT: vld1.32 {d8[1]}, [r4:32]
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
; CHECK-NEXT: vmov r2, r3, d8
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r1, d16[1]
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
ret { <2 x half>, <2 x i32> } %result
}
@@ -89,23 +89,28 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) {
; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract:
; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: add r1, sp, #8
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: strh.w r0, [sp, #14]
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r1, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: strh.w r0, [sp, #12]
+; CHECK-NEXT: add r0, sp, #12
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r1, d16[1]
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: pop {r4, pc}
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
%result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0
ret <2 x half> %result.0
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
index e52b78ca0ea2d..0415c327d099f 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
@@ -11,28 +11,36 @@ define half @test_v4f16_reassoc(<4 x half> %a) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: mov r4, #255
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: orr r4, r4, #65280
+; CHECK-NEXT: mov r8, #255
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: orr r8, r8, #65280
; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: and r0, r3, r4
+; CHECK-NEXT: and r0, r0, r8
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: and r0, r5, r4
-; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: and r0, r7, r4
-; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: and r0, r6, r4
+; CHECK-NEXT: and r0, r6, r8
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __aeabi_fadd
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r6, r8
+; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_fadd
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r4, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
@@ -46,28 +54,36 @@ define half @test_v4f16_seq(<4 x half> %a) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: mov r4, #255
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: orr r4, r4, #65280
+; CHECK-NEXT: mov r8, #255
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: orr r8, r8, #65280
; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: and r0, r3, r4
+; CHECK-NEXT: and r0, r0, r8
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: and r0, r5, r4
-; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: and r0, r7, r4
-; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: and r0, r6, r4
+; CHECK-NEXT: and r0, r6, r8
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __aeabi_fadd
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r6, r8
+; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_fadd
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r4, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
index 3541df933d075..fe81324d6679b 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
@@ -43,9 +43,6 @@ define half @test_v1f16_neutral(<1 x half> %a) nounwind {
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __aeabi_f2h
-; CHECK-NEXT: mov r1, #255
-; CHECK-NEXT: orr r1, r1, #65280
-; CHECK-NEXT: and r0, r0, r1
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
index 8cfcdbd3b4467..d3518fe468607 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
@@ -9,44 +9,41 @@ declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128>)
define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: mov r9, #255
-; CHECK-NEXT: mov r8, r3
-; CHECK-NEXT: orr r9, r9, #65280
-; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: and r0, r0, r9
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: mov r8, #255
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: orr r8, r8, #65280
+; CHECK-NEXT: mov r5, r2
+; CHECK-NEXT: and r0, r0, r8
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: and r0, r5, r9
+; CHECK-NEXT: and r0, r6, r8
; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_fcmpgt
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: and r0, r6, r9
-; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: movne r5, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: bl __aeabi_fcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: and r0, r8, r9
-; CHECK-NEXT: moveq r5, r6
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r6, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r4, r8
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_fcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r4
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl fmaxf
; CHECK-NEXT: bl __aeabi_f2h
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
ret half %b
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
index 70c569e4f4781..14644e00c94b0 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
@@ -9,44 +9,41 @@ declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>)
define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: mov r9, #255
-; CHECK-NEXT: mov r8, r3
-; CHECK-NEXT: orr r9, r9, #65280
-; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: and r0, r0, r9
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: mov r8, #255
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: orr r8, r8, #65280
+; CHECK-NEXT: mov r5, r2
+; CHECK-NEXT: and r0, r0, r8
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: and r0, r5, r9
+; CHECK-NEXT: and r0, r6, r8
; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_fcmplt
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: and r0, r6, r9
-; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: movne r5, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: bl __aeabi_fcmplt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: and r0, r8, r9
-; CHECK-NEXT: moveq r5, r6
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r6, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r4, r8
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_fcmplt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r4
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl fminf
; CHECK-NEXT: bl __aeabi_f2h
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
ret half %b
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
index b3334c43ef58f..1416fa9033f3b 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
@@ -11,28 +11,36 @@ define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: mov r4, #255
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: orr r4, r4, #65280
+; CHECK-NEXT: mov r8, #255
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: orr r8, r8, #65280
; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: and r0, r3, r4
+; CHECK-NEXT: and r0, r0, r8
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: and r0, r5, r4
-; CHECK-NEXT: bl __aeabi_h2f
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: and r0, r7, r4
-; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: and r0, r6, r4
+; CHECK-NEXT: and r0, r6, r8
; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __aeabi_fmul
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r6, r8
+; CHECK-NEXT: bl __aeabi_h2f
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_fmul
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_f2h
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: and r0, r4, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: and r0, r5, r8
+; CHECK-NEXT: bl __aeabi_h2f
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_fmul
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
index d2476ab3f528f..bd6f234ad48ec 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
@@ -16,9 +16,6 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __aeabi_f2h
-; CHECK-NEXT: mov r1, #255
-; CHECK-NEXT: orr r1, r1, #65280
-; CHECK-NEXT: and r0, r0, r1
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 1.0, <1 x half> %a)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vabd.ll b/llvm/test/CodeGen/Thumb2/mve-vabd.ll
index f209a76d82e80..8d52fe52d9360 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vabd.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vabd.ll
@@ -63,27 +63,31 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-MVE-NEXT: mov r4, r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q1[1]
+; CHECK-MVE-NEXT: vmov.u16 r0, q1[0]
; CHECK-MVE-NEXT: vmov q5, q1
; CHECK-MVE-NEXT: vmov q4, q0
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r5, r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q4[1]
+; CHECK-MVE-NEXT: vmov.u16 r0, q4[0]
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
+; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
+; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: mov r5, r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q5[0]
+; CHECK-MVE-NEXT: vmov.u16 r0, q5[1]
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r6, r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q4[0]
+; CHECK-MVE-NEXT: vmov.u16 r0, q4[1]
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r6
; CHECK-MVE-NEXT: bl __aeabi_fsub
-; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
-; CHECK-MVE-NEXT: vmov.16 q6[0], r0
-; CHECK-MVE-NEXT: bic r0, r5, #-2147483648
+; CHECK-MVE-NEXT: vmov.16 q6[0], r5
+; CHECK-MVE-NEXT: bl __aeabi_h2f
+; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[1], r0
; CHECK-MVE-NEXT: vmov.u16 r0, q5[2]
@@ -93,6 +97,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[2], r0
@@ -103,6 +109,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[3], r0
@@ -113,6 +121,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[4], r0
@@ -123,6 +133,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[5], r0
@@ -133,6 +145,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[6], r0
@@ -143,6 +157,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fsub
+; CHECK-MVE-NEXT: bl __aeabi_f2h
+; CHECK-MVE-NEXT: bl __aeabi_h2f
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: bl __aeabi_f2h
; CHECK-MVE-NEXT: vmov.16 q6[7], r0
More information about the llvm-commits
mailing list