[llvm] 20b2d11 - [ARM] Fix Crash in 't'/'w' handling without fp16/bf16

Archibald Elliott via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 6 03:55:18 PST 2023


Author: Archibald Elliott
Date: 2023-03-06T11:55:08Z
New Revision: 20b2d11896d9351ea8347e1342eb0487ecadc70f

URL: https://github.com/llvm/llvm-project/commit/20b2d11896d9351ea8347e1342eb0487ecadc70f
DIFF: https://github.com/llvm/llvm-project/commit/20b2d11896d9351ea8347e1342eb0487ecadc70f.diff

LOG: [ARM] Fix Crash in 't'/'w' handling without fp16/bf16

After https://reviews.llvm.org/rGff4027d152d0 and
https://reviews.llvm.org/rG7d15212b8c0c we saw crashes in SelectionDAG
when trying to use these constraints when you don't have the fp16 or
bf16 extensions.

However, it is still possible to move 16-bit floating point values into
the right place in S registers with a normal `vmov`, even if we don't
have fp16 instructions we can use within the inline assembly string.
This patch therefore fixes the crash.

I think the reason we weren't getting this crash before is because I
think the __fp16 and __bf16 types got an error diagnostic in the Clang
frontend when you didn't have the right architectural extensions to use
them. This restriction was recently relaxed.

The approach for bf16 needs a bit more explanation. Exactly how BF16 is
legalized was changed in rGb769eb02b526e3966847351e15d283514c2ec767 -
effectively, whether you have the right instructions to get a bf16 value
into/out of a S register with MoveTo/FromHPR depends on hasFullFP16, but
whether you use a HPR for a value of type MVT::bf16 depends on hasBF16.
This is why the tests are not changed by `+bf16` vs `-bf16`, but I've
left both sets of RUN lines in case this changes in the future.

Test Changes:
- Added more testing for testing inline asm (the core part)
- fp16-promote.ll and pr47454.ll show improvements where unnecessary
  fp16-fp32 up/down-casts are no longer emitted. This results in fewer
  libcalls where those casts would be done with a libcall.
- aes-erratum-fix.ll is fairly noisy, and I need to revisit this test so
  that the IR is more minimal than it is right now, because most of the
  changes in this commit do not relate to what AES is actually trying to
  verify.

Differential Revision: https://reviews.llvm.org/D143711

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/aes-erratum-fix.ll
    llvm/test/CodeGen/ARM/fp16-promote.ll
    llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
    llvm/test/CodeGen/ARM/pr47454.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 552a24e9b649b..3d2223961433d 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4422,10 +4422,8 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
 bool ARMTargetLowering::splitValueIntoRegisterParts(
     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
-  bool IsABIRegCopy = CC.has_value();
   EVT ValueVT = Val.getValueType();
-  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
-      PartVT == MVT::f32) {
+  if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
     unsigned ValueBits = ValueVT.getSizeInBits();
     unsigned PartBits = PartVT.getSizeInBits();
     Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
@@ -4440,9 +4438,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts(
 SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
-  bool IsABIRegCopy = CC.has_value();
-  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
-      PartVT == MVT::f32) {
+  if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
     unsigned ValueBits = ValueVT.getSizeInBits();
     unsigned PartBits = PartVT.getSizeInBits();
     SDValue Val = Parts[0];
@@ -20241,8 +20237,12 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
     case 'w':
       if (VT == MVT::Other)
         break;
-      if (VT == MVT::f16 || VT == MVT::bf16)
-        return RCPair(0U, &ARM::HPRRegClass);
+      if (VT == MVT::f16)
+        return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass
+                                                   : &ARM::SPRRegClass);
+      if (VT == MVT::bf16)
+        return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass
+                                               : &ARM::SPRRegClass);
       if (VT == MVT::f32)
         return RCPair(0U, &ARM::SPRRegClass);
       if (VT.getSizeInBits() == 64)
@@ -20263,8 +20263,12 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
     case 't':
       if (VT == MVT::Other)
         break;
-      if (VT == MVT::f16 || VT == MVT::bf16)
-        return RCPair(0U, &ARM::HPRRegClass);
+      if (VT == MVT::f16)
+        return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass
+                                                   : &ARM::SPRRegClass);
+      if (VT == MVT::bf16)
+        return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass
+                                               : &ARM::SPRRegClass);
       if (VT == MVT::f32 || VT == MVT::i32)
         return RCPair(0U, &ARM::SPRRegClass);
       if (VT.getSizeInBits() == 64)

diff  --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index afd9a929f75ad..d1e083b0ee883 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -1353,335 +1353,210 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
 define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
 ; CHECK-FIX-NOSCHED:       @ %bb.0:
-; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .pad #24
+; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_3
 ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
 ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    vorr q9, q8, q8
-; CHECK-FIX-NOSCHED-NEXT:    vmov lr, r12, d17
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d18[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r4, d18[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, lr
-; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s6
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r12, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr lr, lr, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s10, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s14, r5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s14
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r4, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
+; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17
+; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16]
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r3
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r5
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB36_4
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_2:
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r1, d0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r7, d1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r1
-; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r1, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
+; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r6, d1
+; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r3, d0
+; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r4, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r4
+; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r3
 ; CHECK-FIX-NOSCHED-NEXT:    b .LBB36_5
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_3:
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r5, [r2, #10]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r12, [r2, #6]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r6, [r2, #2]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #14]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r5
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #14]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #12]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, r12
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r4, [r2, #8]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r6
-; CHECK-FIX-NOSCHED-NEXT:    ldrh lr, [r2, #4]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r7
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r8, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s8
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #8]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #6]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #10]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r10, [r2]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #4]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #2]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_2
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_4:
-; CHECK-FIX-NOSCHED-NEXT:    vorr q8, q0, q0
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d0[1]
-; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r1, d1
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d16[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r1
-; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r1, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r7
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
+; CHECK-FIX-NOSCHED-NEXT:    vmov r5, r3, d1
+; CHECK-FIX-NOSCHED-NEXT:    mov r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d0[1]
+; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d0[0]}, [r1:16]
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d0[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r5
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r3
+; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r7
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    mov r7, r4
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_5:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s15, r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s7
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s15, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s11, s11
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s9
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s11
+; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r0, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s14
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s12
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s5
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r6, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r3, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r1
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s8
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s4
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s1
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r9, r5, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s2
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r1, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
 ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9
 ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
 ; CHECK-CORTEX-FIX:       @ %bb.0:
-; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; CHECK-CORTEX-FIX-NEXT:    .vsave {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vpush {d8, d9}
+; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    .pad #24
+; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_3
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-CORTEX-FIX-NEXT:    vorr q9, q8, q8
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, r6, d17
 ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d18[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d18[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov r3, r6, d17
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r11, r6
 ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r5
-; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s5, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 lr, d18[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s9
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, lr, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s13
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    bne .LBB36_4
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_2:
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d1
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, r1, d0
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r1, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r1
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
+; CHECK-CORTEX-FIX-NEXT:    vmov r1, r7, d0
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    mov r0, r3
+; CHECK-CORTEX-FIX-NEXT:    vmov r7, r3, d1
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r5, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    mov r3, r0
 ; CHECK-CORTEX-FIX-NEXT:    b .LBB36_5
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_3:
-; CHECK-CORTEX-FIX-NEXT:    ldrh r12, [r2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh lr, [r2, #2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r8, [r2, #4]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r5, [r2, #6]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #8]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r2, #12]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #14]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #4]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #6]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #8]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #10]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r2, #12]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r6, [r2, #14]
-; CHECK-CORTEX-FIX-NEXT:    vmov s5, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r8
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s11
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_2
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_4:
 ; CHECK-CORTEX-FIX-NEXT:    vorr q8, q0, q0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d0[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r5, d0[1]
 ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r1, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r1
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r0, d16[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    vmov s18, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s18
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r5
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov r5, r7, d1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r1, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r5
+; CHECK-CORTEX-FIX-NEXT:    lsr r5, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_5:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s10
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s4, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s1, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s5, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s11, s15
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r12, r0, r1, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s1
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s11
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r1, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s14
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s5
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s4
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r5, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r5
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], lr
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r4, r0, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s3
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r4, r1, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r1
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r3
+; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r10, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r1, r3, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
 ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
 ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
 ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
-; CHECK-CORTEX-FIX-NEXT:    vpop {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24
+; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   br i1 %0, label %5, label %12
 
 5:
@@ -1727,341 +1602,213 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
 define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
 ; CHECK-FIX-NOSCHED:       @ %bb.0:
-; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s0
+; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .pad #24
+; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    vmov r12, s0
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_3
+; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_2
 ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s9
 ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov lr, r12, d17
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, lr
-; CHECK-FIX-NOSCHED-NEXT:    lsr lr, lr, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r2, d16[0]
-; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r12, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s10, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s14, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s10
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s14
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    bne .LBB37_4
-; CHECK-FIX-NOSCHED-NEXT:  .LBB37_2:
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r2, d2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r7, d3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
+; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17
+; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r12
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    uxth r2, r3
 ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r0
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_5
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r3, r5
+; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_3
+; CHECK-FIX-NOSCHED-NEXT:  .LBB37_2:
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #14]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #12]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #8]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #6]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #2]
+; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #10]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #4]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1]
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_3:
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #10]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r12, [r1, #6]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r5, [r1, #2]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #14]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r3
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #12]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, r12
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r4, [r1, #8]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r5
-; CHECK-FIX-NOSCHED-NEXT:    ldrh lr, [r1, #4]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r7
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r6, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s8
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_2
-; CHECK-FIX-NOSCHED-NEXT:  .LBB37_4:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r2, d3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r7, s9
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d2[1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r7
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d2[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r7
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_5
+; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4:
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r6, d2[1]
+; CHECK-FIX-NOSCHED-NEXT:    mov r3, r2
+; CHECK-FIX-NOSCHED-NEXT:    mov r2, r7
+; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r7, d3
+; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r12
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d2[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r4
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r7
+; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    mov r7, r2
+; CHECK-FIX-NOSCHED-NEXT:    mov r2, r3
+; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r4, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_6
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_5:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s15, r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s15, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s11, s11
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s9
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s11
+; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r6, d3
+; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r5, d2
+; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r3
+; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r5
+; CHECK-FIX-NOSCHED-NEXT:  .LBB37_6:
+; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r0, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s14
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s12
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s1
+; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r2, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s8
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s6, s7
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s4, s5
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r4, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s4
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r9, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
 ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9
 ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
 ; CHECK-CORTEX-FIX:       @ %bb.0:
-; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-CORTEX-FIX-NEXT:    .vsave {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vpush {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s0
+; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    .pad #28
+; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_3
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_2
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s9
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
 ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, r5, d17
-; CHECK-CORTEX-FIX-NEXT:    lsr lr, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    lsr r6, r4, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s1, r6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s8
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d16[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s11
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s15
-; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    bne .LBB37_4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r7, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov r3, r7, d17
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r11, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    b .LBB37_3
 ; CHECK-CORTEX-FIX-NEXT:  .LBB37_2:
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d3
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, r2, d2
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    b .LBB37_5
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r1, #12]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #14]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #24] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #4]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #6]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #8]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #10]
 ; CHECK-CORTEX-FIX-NEXT:  .LBB37_3:
-; CHECK-CORTEX-FIX-NEXT:    ldrh r12, [r1]
-; CHECK-CORTEX-FIX-NEXT:    ldrh lr, [r1, #2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #4]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r6, [r1, #6]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r5, [r1, #8]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r1, #10]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r2, [r1, #12]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #14]
-; CHECK-CORTEX-FIX-NEXT:    vmov s1, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s13
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_2
-; CHECK-CORTEX-FIX-NEXT:  .LBB37_4:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d3
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r2
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r0, d2[0]
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s4
-; CHECK-CORTEX-FIX-NEXT:    vmov s18, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s18
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_5
+; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d2[1]
+; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r2
+; CHECK-CORTEX-FIX-NEXT:    vmov r4, r6, d3
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r4
+; CHECK-CORTEX-FIX-NEXT:    lsr r4, r4, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r6
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r6, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    b .LBB37_6
 ; CHECK-CORTEX-FIX-NEXT:  .LBB37_5:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s10
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s1, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s5, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s11, s15
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s8
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r12, r0, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s3
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s1
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s11
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r2, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s14
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s5
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s4
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r5, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s12
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r5
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], lr
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r4, r0, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s7
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r4, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov r2, r3, d2
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    mov r0, r7
+; CHECK-CORTEX-FIX-NEXT:    vmov r6, r7, d3
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r6
+; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    mov r7, r0
+; CHECK-CORTEX-FIX-NEXT:  .LBB37_6:
+; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r7, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r10, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r2, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r2, r3, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r2
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r5
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
 ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
 ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
 ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT:    vpop {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   br i1 %0, label %5, label %11
 
 5:
@@ -2159,10 +1906,7 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
 ; CHECK-FIX-NEXT:    cmp r0, #0
 ; CHECK-FIX-NEXT:    bxeq lr
 ; CHECK-FIX-NEXT:  .LBB39_1:
-; CHECK-FIX-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-FIX-NEXT:    vmov r2, s0
-; CHECK-FIX-NEXT:    uxth r2, r2
 ; CHECK-FIX-NEXT:    vmov.16 d2[0], r2
 ; CHECK-FIX-NEXT:  .LBB39_2: @ =>This Inner Loop Header: Depth=1
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
@@ -3821,335 +3565,210 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8
 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
 ; CHECK-FIX-NOSCHED:       @ %bb.0:
-; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .pad #24
+; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_3
 ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
 ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    vorr q9, q8, q8
-; CHECK-FIX-NOSCHED-NEXT:    vmov lr, r12, d17
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d18[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r4, d18[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, lr
-; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s6
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r12, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr lr, lr, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s10, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s14, r5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s14
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r4, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
+; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17
+; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16]
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r3
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r5
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB82_4
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_2:
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r1, d0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r7, d1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r1
-; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r1, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
+; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r6, d1
+; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r3, d0
+; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r4, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r4
+; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r3
 ; CHECK-FIX-NOSCHED-NEXT:    b .LBB82_5
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_3:
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r5, [r2, #10]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r12, [r2, #6]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r6, [r2, #2]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #14]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r5
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #14]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #12]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, r12
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r4, [r2, #8]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r6
-; CHECK-FIX-NOSCHED-NEXT:    ldrh lr, [r2, #4]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r7
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r8, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s8
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #8]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #6]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #10]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r10, [r2]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #4]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #2]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
 ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_2
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_4:
-; CHECK-FIX-NOSCHED-NEXT:    vorr q8, q0, q0
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d0[1]
-; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r1, d1
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d16[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r3
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r1
-; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r1, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r7
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
+; CHECK-FIX-NOSCHED-NEXT:    vmov r5, r3, d1
+; CHECK-FIX-NOSCHED-NEXT:    mov r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d0[1]
+; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d0[0]}, [r1:16]
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d0[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r5
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r3
+; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r7
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    mov r7, r4
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_5:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s15, r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s7
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s15, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s11, s11
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s5, s9
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s11
+; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r0, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s14
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s12
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s5
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r6, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r3, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r1
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s8
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s2, s4
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s1
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r9, r5, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r1, s2
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r1, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
 ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9
 ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
 ; CHECK-CORTEX-FIX:       @ %bb.0:
-; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; CHECK-CORTEX-FIX-NEXT:    .vsave {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vpush {d8, d9}
+; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    .pad #24
+; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_3
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-CORTEX-FIX-NEXT:    vorr q9, q8, q8
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, r6, d17
 ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d18[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d18[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov r3, r6, d17
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r11, r6
 ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r5
-; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s5, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 lr, d18[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s9
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, lr, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s13
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    bne .LBB82_4
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_2:
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d1
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, r1, d0
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r1, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r1
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
+; CHECK-CORTEX-FIX-NEXT:    vmov r1, r7, d0
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    mov r0, r3
+; CHECK-CORTEX-FIX-NEXT:    vmov r7, r3, d1
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r5, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    mov r3, r0
 ; CHECK-CORTEX-FIX-NEXT:    b .LBB82_5
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_3:
-; CHECK-CORTEX-FIX-NEXT:    ldrh r12, [r2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh lr, [r2, #2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r8, [r2, #4]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r5, [r2, #6]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #8]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r2, #12]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #14]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #4]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #6]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #8]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #10]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r2, #12]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r6, [r2, #14]
-; CHECK-CORTEX-FIX-NEXT:    vmov s5, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r8
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s11
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
 ; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_2
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_4:
 ; CHECK-CORTEX-FIX-NEXT:    vorr q8, q0, q0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d0[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r5, d0[1]
 ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r1, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r1
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r0, d16[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    vmov s18, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s18
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r5
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov r5, r7, d1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r1, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r5
+; CHECK-CORTEX-FIX-NEXT:    lsr r5, r5, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_5:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s10
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s4, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s1, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s5, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s11, s15
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r12, r0, r1, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s1
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s11
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r1, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s14
-; CHECK-CORTEX-FIX-NEXT:    vmov r1, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s5
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s4
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r5, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r5
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], lr
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r4, r0, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s3
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r4, r1, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r1
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r3
+; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r10, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r1, r3, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
 ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8
 ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9
 ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
-; CHECK-CORTEX-FIX-NEXT:    vpop {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24
+; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   br i1 %0, label %5, label %12
 
 5:
@@ -4195,341 +3814,213 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
 ; CHECK-FIX-NOSCHED:       @ %bb.0:
-; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s0
+; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FIX-NOSCHED-NEXT:    .pad #24
+; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    vmov r12, s0
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_3
+; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_2
 ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s9
 ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov lr, r12, d17
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, lr
-; CHECK-FIX-NOSCHED-NEXT:    lsr lr, lr, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r2, d16[0]
-; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r12, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s10, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s14, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s10
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s14
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s3, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    bne .LBB83_4
-; CHECK-FIX-NOSCHED-NEXT:  .LBB83_2:
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r2, d2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r7, d3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r2
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r3
+; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17
+; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r12
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7
+; CHECK-FIX-NOSCHED-NEXT:    uxth r2, r3
 ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r0
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_5
+; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    uxth r3, r5
+; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_3
+; CHECK-FIX-NOSCHED-NEXT:  .LBB83_2:
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #14]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #12]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #8]
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #6]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #2]
+; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #10]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #4]
+; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1]
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_3:
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #10]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r12, [r1, #6]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r5, [r1, #2]
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #14]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r3
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #12]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, r12
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r4, [r1, #8]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r5
-; CHECK-FIX-NOSCHED-NEXT:    ldrh lr, [r1, #4]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r7
-; CHECK-FIX-NOSCHED-NEXT:    ldrh r6, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vmov s0, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s10, s8
-; CHECK-FIX-NOSCHED-NEXT:    vmov s8, r4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-FIX-NOSCHED-NEXT:    vmov s12, lr
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s3, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s1, r6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s1, s1
 ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
-; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_2
-; CHECK-FIX-NOSCHED-NEXT:  .LBB83_4:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r2, d3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r7, s9
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d2[1]
-; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r7
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d2[0]
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r3
-; CHECK-FIX-NOSCHED-NEXT:    vmov s4, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s9, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s6, r0
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r0, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT:    lsr r2, r2, #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov s7, r0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vmov s13, r3
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s7, s7
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r7
-; CHECK-FIX-NOSCHED-NEXT:    lsr r0, r7, #16
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s11, s5
-; CHECK-FIX-NOSCHED-NEXT:    vmov s5, r2
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s5, s5
+; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_5
+; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4:
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r6, d2[1]
+; CHECK-FIX-NOSCHED-NEXT:    mov r3, r2
+; CHECK-FIX-NOSCHED-NEXT:    mov r2, r7
+; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r7, d3
+; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r12
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d2[0]
+; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r6
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r4
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r7
+; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r7, #16
+; CHECK-FIX-NOSCHED-NEXT:    mov r7, r2
+; CHECK-FIX-NOSCHED-NEXT:    mov r2, r3
+; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r4, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_6
 ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_5:
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s1
-; CHECK-FIX-NOSCHED-NEXT:    vmov s15, r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s3
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f32.f16 s15, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s15
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s11, s11
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s1
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s1, s9
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s4, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s11
+; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r6, d3
+; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r5, d2
+; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r6, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r5, #16
+; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6
+; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r3
+; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r5
+; CHECK-FIX-NOSCHED-NEXT:  .LBB83_6:
+; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r0, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s14
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s12
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s12, s13
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s12
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s1
+; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r2, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s8
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s8, s10
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s8
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s6
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s6, s7
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s6
-; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s4
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s4, s5
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r4, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s4
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT:    vmov r2, s0
-; CHECK-FIX-NOSCHED-NEXT:    vcvtb.f16.f32 s0, s2
-; CHECK-FIX-NOSCHED-NEXT:    vmov r3, s0
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r9, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16
 ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
 ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9
 ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
 ; CHECK-CORTEX-FIX:       @ %bb.0:
-; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-CORTEX-FIX-NEXT:    .vsave {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vpush {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s0
+; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-CORTEX-FIX-NEXT:    .pad #28
+; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_3
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_2
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s9
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
 ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, r5, d17
-; CHECK-CORTEX-FIX-NEXT:    lsr lr, r3, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    lsr r6, r4, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s1, r6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s8
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d16[0]
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s11
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s15
-; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    bne .LBB83_4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r7, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov r3, r7, d17
+; CHECK-CORTEX-FIX-NEXT:    uxth r6, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r11, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    b .LBB83_3
 ; CHECK-CORTEX-FIX-NEXT:  .LBB83_2:
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d3
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, r2, d2
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s7
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    b .LBB83_5
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r1, #12]
+; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #14]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #24] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #4]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #6]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #8]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #10]
 ; CHECK-CORTEX-FIX-NEXT:  .LBB83_3:
-; CHECK-CORTEX-FIX-NEXT:    ldrh r12, [r1]
-; CHECK-CORTEX-FIX-NEXT:    ldrh lr, [r1, #2]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #4]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r6, [r1, #6]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r5, [r1, #8]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r1, #10]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r2, [r1, #12]
-; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #14]
-; CHECK-CORTEX-FIX-NEXT:    vmov s1, r6
-; CHECK-CORTEX-FIX-NEXT:    vmov s3, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov s0, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov s8, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov s12, r5
-; CHECK-CORTEX-FIX-NEXT:    vmov s11, lr
-; CHECK-CORTEX-FIX-NEXT:    vmov s13, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s14, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s10, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s2, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s0, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s1, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s8, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s12, s13
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_2
-; CHECK-CORTEX-FIX-NEXT:  .LBB83_4:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    lsr r7, r2, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s16, r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, r5, d3
-; CHECK-CORTEX-FIX-NEXT:    vmov s7, r2
-; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT:    lsr r3, r5, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s6, r6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s11, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov s9, r3
-; CHECK-CORTEX-FIX-NEXT:    vmov s15, r4
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 r0, d2[0]
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s13, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s9, s15
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s15, s16
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s5, s4
-; CHECK-CORTEX-FIX-NEXT:    vmov s18, r0
-; CHECK-CORTEX-FIX-NEXT:    lsr r12, r0, #16
-; CHECK-CORTEX-FIX-NEXT:    vmov s4, r12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s7, s18
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_5
+; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d2[1]
+; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r2
+; CHECK-CORTEX-FIX-NEXT:    vmov r4, r6, d3
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r4
+; CHECK-CORTEX-FIX-NEXT:    lsr r4, r4, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r6
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r6, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[0]
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    b .LBB83_6
 ; CHECK-CORTEX-FIX-NEXT:  .LBB83_5:
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s10
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s3, s3
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s2, s2
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s8, s8
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s12, s12
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s6, s6
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s9, s9
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s7, s7
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s14
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s10, s1
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s14, s5
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s1, s13
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s5, s11
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s11, s15
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s2
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s8
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r12, r0, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s3
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s10
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s1
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s11
-; CHECK-CORTEX-FIX-NEXT:    vmov r0, s9
-; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r2, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r3, s14
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r7, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r7, s5
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r6, s0
-; CHECK-CORTEX-FIX-NEXT:    vcvtb.f16.f32 s0, s4
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r5, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r5, s12
-; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r5
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r6
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], lr
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r12
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r4, r0, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov r4, s7
-; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r4, r2, lsl #16
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r2
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r0
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r7
-; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov r2, r3, d2
+; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2
+; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3
+; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16
+; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    mov r0, r7
+; CHECK-CORTEX-FIX-NEXT:    vmov r6, r7, d3
+; CHECK-CORTEX-FIX-NEXT:    uxth r10, r6
+; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16
+; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7
+; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16
+; CHECK-CORTEX-FIX-NEXT:    mov r7, r0
+; CHECK-CORTEX-FIX-NEXT:  .LBB83_6:
+; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r7, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r10, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r2, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r2, r3, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r6, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r3
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r2
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11
+; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r4
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r5
+; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
 ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8
 ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9
 ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
-; CHECK-CORTEX-FIX-NEXT:    vpop {d8, d9}
-; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #28
+; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   br i1 %0, label %5, label %11
 
 5:
@@ -4627,10 +4118,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8>
 ; CHECK-FIX-NEXT:    cmp r0, #0
 ; CHECK-FIX-NEXT:    bxeq lr
 ; CHECK-FIX-NEXT:  .LBB85_1:
-; CHECK-FIX-NEXT:    vcvtb.f32.f16 s0, s0
-; CHECK-FIX-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-FIX-NEXT:    vmov r2, s0
-; CHECK-FIX-NEXT:    uxth r2, r2
 ; CHECK-FIX-NEXT:    vmov.16 d2[0], r2
 ; CHECK-FIX-NEXT:  .LBB85_2: @ =>This Inner Loop Header: Depth=1
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]

diff  --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 15e7caa51083f..ab8282c5aa120 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -223,18 +223,12 @@ else:
 
 declare i1 @test_dummy(ptr %p) #0
 ; CHECK-ALL-LABEL: test_phi:
-; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: [[LOOP:.LBB[0-9_]+]]:
 ; CHECK-FP16: bl      test_dummy
 ; CHECK-FP16: bne     [[LOOP]]
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-VFP: bl __aeabi_h2f
-; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-LIBCALL-VFP: bl __aeabi_h2f
+; CHECK-LIBCALL: [[LOOP:.LBB[0-9_]+]]:
 ; CHECK-LIBCALL: bl test_dummy
 ; CHECK-LIBCALL: bne     [[LOOP]]
-; CHECK-LIBCALL-VFP: bl __aeabi_f2h
 define void @test_phi(ptr %p) #0 {
 entry:
   %a = load half, ptr %p

diff  --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
index 6da247795a3f6..9840e3f63c5ac 100644
--- a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
@@ -1,60 +1,173 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumb -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s
+; No FP16/BF16
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
 
+; With FP16, Without BF16
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
 
-define arm_aapcscc half @f_t(half %x) nounwind {
-; CHECK-LABEL: f_t:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    @APP
-; CHECK-NEXT:    vsqrt.f16 s0, s0
-; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    bx lr
+; With FP16/BF16
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
+
+; This test ensures that we can use `w` and `t` constraints to allocate
+; S-registers for 16-bit FP inputs and outputs for inline assembly, with either
+; the softfp or hard float ABIs. (With the soft abi, no S-regs are available).
+
+define half @half_t(half %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: half_t:
+; NO-FP16-SOFTFP:       @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT:    vmov s0, r0
+; NO-FP16-SOFTFP-NEXT:    @APP
+; NO-FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT:    @NO_APP
+; NO-FP16-SOFTFP-NEXT:    vmov r0, s0
+; NO-FP16-SOFTFP-NEXT:    bx lr
+;
+; NO-FP16-HARD-LABEL: half_t:
+; NO-FP16-HARD:       @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT:    @APP
+; NO-FP16-HARD-NEXT:    vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT:    @NO_APP
+; NO-FP16-HARD-NEXT:    bx lr
+;
+; FP16-SOFTFP-LABEL: half_t:
+; FP16-SOFTFP:       @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT:    vmov.f16 s0, r0
+; FP16-SOFTFP-NEXT:    @APP
+; FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT:    @NO_APP
+; FP16-SOFTFP-NEXT:    vmov r0, s0
+; FP16-SOFTFP-NEXT:    bx lr
+;
+; FP16-HARD-LABEL: half_t:
+; FP16-HARD:       @ %bb.0: @ %entry
+; FP16-HARD-NEXT:    @APP
+; FP16-HARD-NEXT:    vmov.f32 s0, s0
+; FP16-HARD-NEXT:    @NO_APP
+; FP16-HARD-NEXT:    bx lr
 entry:
-  %0 = tail call half asm "vsqrt.f16 $0, $1", "=t,t"(half %x)
+  %0 = tail call half asm "vmov $0, $1", "=t,t"(half %x)
   ret half %0
 }
 
-define arm_aapcscc half @f_w(half %x) nounwind {
-; CHECK-LABEL: f_w:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    @APP
-; CHECK-NEXT:    vsqrt.f16 s0, s0
-; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    bx lr
+define half @half_w(half %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: half_w:
+; NO-FP16-SOFTFP:       @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT:    vmov s0, r0
+; NO-FP16-SOFTFP-NEXT:    @APP
+; NO-FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT:    @NO_APP
+; NO-FP16-SOFTFP-NEXT:    vmov r0, s0
+; NO-FP16-SOFTFP-NEXT:    bx lr
+;
+; NO-FP16-HARD-LABEL: half_w:
+; NO-FP16-HARD:       @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT:    @APP
+; NO-FP16-HARD-NEXT:    vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT:    @NO_APP
+; NO-FP16-HARD-NEXT:    bx lr
+;
+; FP16-SOFTFP-LABEL: half_w:
+; FP16-SOFTFP:       @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT:    vmov.f16 s0, r0
+; FP16-SOFTFP-NEXT:    @APP
+; FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT:    @NO_APP
+; FP16-SOFTFP-NEXT:    vmov r0, s0
+; FP16-SOFTFP-NEXT:    bx lr
+;
+; FP16-HARD-LABEL: half_w:
+; FP16-HARD:       @ %bb.0: @ %entry
+; FP16-HARD-NEXT:    @APP
+; FP16-HARD-NEXT:    vmov.f32 s0, s0
+; FP16-HARD-NEXT:    @NO_APP
+; FP16-HARD-NEXT:    bx lr
 entry:
-  %0 = tail call half asm "vsqrt.f16 $0, $1", "=w,w"(half %x)
+  %0 = tail call half asm "vmov $0, $1", "=w,w"(half %x)
   ret half %0
 }
 
-define arm_aapcscc bfloat @h_t(bfloat %x) nounwind {
-; CHECK-LABEL: h_t:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    @APP
-; CHECK-NEXT:    vmov.f32 s0, s0
-; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    vmov.f16 r0, s0
-; CHECK-NEXT:    bx lr
+define bfloat @bf16_t(bfloat %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: bf16_t:
+; NO-FP16-SOFTFP:       @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT:    vmov s0, r0
+; NO-FP16-SOFTFP-NEXT:    @APP
+; NO-FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT:    @NO_APP
+; NO-FP16-SOFTFP-NEXT:    vmov r0, s0
+; NO-FP16-SOFTFP-NEXT:    bx lr
+;
+; NO-FP16-HARD-LABEL: bf16_t:
+; NO-FP16-HARD:       @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT:    @APP
+; NO-FP16-HARD-NEXT:    vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT:    @NO_APP
+; NO-FP16-HARD-NEXT:    bx lr
+;
+; FP16-SOFTFP-LABEL: bf16_t:
+; FP16-SOFTFP:       @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT:    vmov s0, r0
+; FP16-SOFTFP-NEXT:    @APP
+; FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT:    @NO_APP
+; FP16-SOFTFP-NEXT:    vmov r0, s0
+; FP16-SOFTFP-NEXT:    bx lr
+;
+; FP16-HARD-LABEL: bf16_t:
+; FP16-HARD:       @ %bb.0: @ %entry
+; FP16-HARD-NEXT:    @APP
+; FP16-HARD-NEXT:    vmov.f32 s0, s0
+; FP16-HARD-NEXT:    @NO_APP
+; FP16-HARD-NEXT:    bx lr
 entry:
-  %0 = tail call bfloat asm "vmov.f32 $0, $1", "=t,t"(bfloat %x)
+  %0 = tail call bfloat asm "vmov $0, $1", "=t,t"(bfloat %x)
   ret bfloat %0
 }
 
-define arm_aapcscc bfloat @h_w(bfloat %x) nounwind {
-; CHECK-LABEL: h_w:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f16 s0, r0
-; CHECK-NEXT:    @APP
-; CHECK-NEXT:    vmov.f32 s0, s0
-; CHECK-NEXT:    @NO_APP
-; CHECK-NEXT:    vmov.f16 r0, s0
-; CHECK-NEXT:    bx lr
+define bfloat @bf16_w(bfloat %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: bf16_w:
+; NO-FP16-SOFTFP:       @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT:    vmov s0, r0
+; NO-FP16-SOFTFP-NEXT:    @APP
+; NO-FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT:    @NO_APP
+; NO-FP16-SOFTFP-NEXT:    vmov r0, s0
+; NO-FP16-SOFTFP-NEXT:    bx lr
+;
+; NO-FP16-HARD-LABEL: bf16_w:
+; NO-FP16-HARD:       @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT:    @APP
+; NO-FP16-HARD-NEXT:    vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT:    @NO_APP
+; NO-FP16-HARD-NEXT:    bx lr
+;
+; FP16-SOFTFP-LABEL: bf16_w:
+; FP16-SOFTFP:       @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT:    vmov s0, r0
+; FP16-SOFTFP-NEXT:    @APP
+; FP16-SOFTFP-NEXT:    vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT:    @NO_APP
+; FP16-SOFTFP-NEXT:    vmov r0, s0
+; FP16-SOFTFP-NEXT:    bx lr
+;
+; FP16-HARD-LABEL: bf16_w:
+; FP16-HARD:       @ %bb.0: @ %entry
+; FP16-HARD-NEXT:    @APP
+; FP16-HARD-NEXT:    vmov.f32 s0, s0
+; FP16-HARD-NEXT:    @NO_APP
+; FP16-HARD-NEXT:    bx lr
 entry:
-  %0 = tail call bfloat asm "vmov.f32 $0, $1", "=w,w"(bfloat %x)
+  %0 = tail call bfloat asm "vmov $0, $1", "=w,w"(bfloat %x)
   ret bfloat %0
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; BF16-HARD: {{.*}}
+; BF16-SOFTFP: {{.*}}

diff  --git a/llvm/test/CodeGen/ARM/pr47454.ll b/llvm/test/CodeGen/ARM/pr47454.ll
index 6624c94d2e9c5..95f0ac75bd4d2 100644
--- a/llvm/test/CodeGen/ARM/pr47454.ll
+++ b/llvm/test/CodeGen/ARM/pr47454.ll
@@ -10,29 +10,15 @@ define internal fastcc void @main() {
 ; CHECK:       @ %bb.0: @ %Entry
 ; CHECK-NEXT:    push {r11, lr}
 ; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    sub sp, sp, #8
 ; CHECK-NEXT:    mov r0, #31744
 ; CHECK-NEXT:    strh r0, [r11, #-2]
 ; CHECK-NEXT:    ldrh r0, [r11, #-2]
-; CHECK-NEXT:    bl __gnu_h2f_ieee
 ; CHECK-NEXT:    vmov s0, r0
-; CHECK-NEXT:    vstr s0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vstr s0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    bl getConstant
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    bl __gnu_h2f_ieee
-; CHECK-NEXT:    vmov s0, r0
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    bl __gnu_f2h_ieee
-; CHECK-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    bl __gnu_f2h_ieee
-; CHECK-NEXT:    mov r1, r0
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    uxth r1, r1
-; CHECK-NEXT:    vmov s0, r1
-; CHECK-NEXT:    uxth r0, r0
-; CHECK-NEXT:    vmov s1, r0
+; CHECK-NEXT:    vmov.f32 s1, s0
+; CHECK-NEXT:    vldr s0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    bl isEqual
 ; CHECK-NEXT:    mov sp, r11
 ; CHECK-NEXT:    pop {r11, pc}


        


More information about the llvm-commits mailing list