[llvm-branch-commits] [llvm] 9390b85 - [ARM] Use half directly for args/return types in test. NFC

David Green via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jan 25 09:55:14 PST 2021


Author: David Green
Date: 2021-01-25T17:50:19Z
New Revision: 9390b85ac6aeebba3ced32009380c2311545776e

URL: https://github.com/llvm/llvm-project/commit/9390b85ac6aeebba3ced32009380c2311545776e
DIFF: https://github.com/llvm/llvm-project/commit/9390b85ac6aeebba3ced32009380c2311545776e.diff

LOG: [ARM] Use half directly for args/return types in test. NFC

Until fairly recently the calling convention for IR half was not handled
correctly in the ARM backend, meaning we needed to pass pointers that
were loaded/stored. Now that that is fixed we can switch to using the
type directly instead.

Added: 
    

Modified: 
    llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
    llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
    llvm/test/CodeGen/Thumb2/mve-shuffle.ll
    llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
    llvm/test/CodeGen/Thumb2/mve-vdup.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index fa58c72a0efb..cfcbfaf54ab6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1,25 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
 
-define arm_aapcs_vfpcc void @test_fadd(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fadd(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fadd:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB0_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
+; CHECK-NEXT:    vmov.f16 r3, s0
 ; CHECK-NEXT:  .LBB0_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
-; CHECK-NEXT:    subs r3, #8
-; CHECK-NEXT:    vadd.f16 q0, q0, r1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    subs r2, #8
+; CHECK-NEXT:    vadd.f16 q0, q0, r3
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
 ; CHECK-NEXT:    bne .LBB0_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -48,25 +47,24 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fadd_r(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fadd_r(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fadd_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB1_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
+; CHECK-NEXT:    vmov.f16 r3, s0
 ; CHECK-NEXT:  .LBB1_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
-; CHECK-NEXT:    subs r3, #8
-; CHECK-NEXT:    vadd.f16 q0, q0, r1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    subs r2, #8
+; CHECK-NEXT:    vadd.f16 q0, q0, r3
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
 ; CHECK-NEXT:    bne .LBB1_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -95,25 +93,24 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fmul(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmul(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fmul:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB2_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
+; CHECK-NEXT:    vmov.f16 r3, s0
 ; CHECK-NEXT:  .LBB2_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
-; CHECK-NEXT:    subs r3, #8
-; CHECK-NEXT:    vmul.f16 q0, q0, r1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    subs r2, #8
+; CHECK-NEXT:    vmul.f16 q0, q0, r3
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
 ; CHECK-NEXT:    bne .LBB2_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -142,25 +139,24 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fmul_r(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmul_r(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fmul_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB3_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
+; CHECK-NEXT:    vmov.f16 r3, s0
 ; CHECK-NEXT:  .LBB3_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
-; CHECK-NEXT:    subs r3, #8
-; CHECK-NEXT:    vmul.f16 q0, q0, r1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    subs r2, #8
+; CHECK-NEXT:    vmul.f16 q0, q0, r3
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
 ; CHECK-NEXT:    bne .LBB3_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -189,25 +185,24 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fsub(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fsub(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fsub:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB4_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
+; CHECK-NEXT:    vmov.f16 r3, s0
 ; CHECK-NEXT:  .LBB4_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
-; CHECK-NEXT:    subs r3, #8
-; CHECK-NEXT:    vsub.f16 q0, q0, r1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    subs r2, #8
+; CHECK-NEXT:    vsub.f16 q0, q0, r3
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
 ; CHECK-NEXT:    bne .LBB4_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -236,26 +231,25 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fsub_r(half* noalias nocapture readonly %A, half *%BB, half* noalias nocapture %C, i32 %n) {
+define arm_aapcs_vfpcc void @test_fsub_r(half* noalias nocapture readonly %A, half %B, half* noalias nocapture %C, i32 %n) {
 ; CHECK-LABEL: test_fsub_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB5_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r1, [r1]
-; CHECK-NEXT:    vdup.16 q0, r1
+; CHECK-NEXT:    vmov.f16 r3, s0
+; CHECK-NEXT:    vdup.16 q0, r3
 ; CHECK-NEXT:  .LBB5_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
-; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    subs r2, #8
 ; CHECK-NEXT:    vsub.f16 q1, q0, q1
-; CHECK-NEXT:    vstrb.8 q1, [r2], #16
+; CHECK-NEXT:    vstrb.8 q1, [r1], #16
 ; CHECK-NEXT:    bne .LBB5_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %B = load half, half* %BB
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -285,27 +279,25 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 
-define arm_aapcs_vfpcc void @test_fmas(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmas(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fmas:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB6_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB6_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
-; CHECK-NEXT:    vfmas.f16 q1, q0, r2
-; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    vfmas.f16 q1, q0, r12
+; CHECK-NEXT:    vstrb.8 q1, [r2], #16
 ; CHECK-NEXT:    bne .LBB6_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -338,27 +330,25 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fmas_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmas_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fmas_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB7_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB7_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
-; CHECK-NEXT:    vfmas.f16 q1, q0, r2
-; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    vfmas.f16 q1, q0, r12
+; CHECK-NEXT:    vstrb.8 q1, [r2], #16
 ; CHECK-NEXT:    bne .LBB7_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -391,27 +381,25 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fma(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fma(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fma:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB8_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB8_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
-; CHECK-NEXT:    vfma.f16 q1, q0, r2
-; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    vfma.f16 q1, q0, r12
+; CHECK-NEXT:    vstrb.8 q1, [r2], #16
 ; CHECK-NEXT:    bne .LBB8_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -444,27 +432,25 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fma_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fma_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fma_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB9_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB9_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
-; CHECK-NEXT:    vfma.f16 q1, q0, r2
-; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    vfma.f16 q1, q0, r12
+; CHECK-NEXT:    vstrb.8 q1, [r2], #16
 ; CHECK-NEXT:    bne .LBB9_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -498,30 +484,28 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 
-define arm_aapcs_vfpcc void @test_fmss(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmss(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fmss:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB10_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    vdup.16 q0, r2
+; CHECK-NEXT:    vmov.f16 r12, s0
+; CHECK-NEXT:    vdup.16 q0, r12
 ; CHECK-NEXT:    vneg.f16 q0, q0
 ; CHECK-NEXT:  .LBB10_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q2, [r1], #16
 ; CHECK-NEXT:    vmov q3, q0
-; CHECK-NEXT:    subs.w r12, r12, #8
+; CHECK-NEXT:    subs r3, #8
 ; CHECK-NEXT:    vfma.f16 q3, q2, q1
-; CHECK-NEXT:    vstrb.8 q3, [r3], #16
+; CHECK-NEXT:    vstrb.8 q3, [r2], #16
 ; CHECK-NEXT:    bne .LBB10_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -554,29 +538,27 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fmss_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fmss_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fmss_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB11_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    vdup.16 q0, r2
+; CHECK-NEXT:    vmov.f16 r12, s0
+; CHECK-NEXT:    vdup.16 q0, r12
 ; CHECK-NEXT:  .LBB11_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q2, [r1], #16
 ; CHECK-NEXT:    vmov q3, q0
-; CHECK-NEXT:    subs.w r12, r12, #8
+; CHECK-NEXT:    subs r3, #8
 ; CHECK-NEXT:    vfms.f16 q3, q2, q1
-; CHECK-NEXT:    vstrb.8 q3, [r3], #16
+; CHECK-NEXT:    vstrb.8 q3, [r2], #16
 ; CHECK-NEXT:    bne .LBB11_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -609,28 +591,26 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fms(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fms(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fms:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB12_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB12_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r1], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
+; CHECK-NEXT:    subs r3, #8
 ; CHECK-NEXT:    vneg.f16 q0, q0
-; CHECK-NEXT:    vfma.f16 q0, q1, r2
-; CHECK-NEXT:    vstrb.8 q0, [r3], #16
+; CHECK-NEXT:    vfma.f16 q0, q1, r12
+; CHECK-NEXT:    vstrb.8 q0, [r2], #16
 ; CHECK-NEXT:    bne .LBB12_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -663,28 +643,26 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-define arm_aapcs_vfpcc void @test_fms_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half *%CC, half* noalias nocapture %D, i32 %n) {
+define arm_aapcs_vfpcc void @test_fms_r(half* noalias nocapture readonly %A, half* noalias nocapture readonly %B, half %C, half* noalias nocapture %D, i32 %n) {
 ; CHECK-LABEL: test_fms_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr.w r12, [sp]
-; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    bxlt lr
 ; CHECK-NEXT:  .LBB13_1: @ %vector.ph
-; CHECK-NEXT:    ldrh r2, [r2]
+; CHECK-NEXT:    vmov.f16 r12, s0
 ; CHECK-NEXT:  .LBB13_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r1], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
-; CHECK-NEXT:    subs.w r12, r12, #8
+; CHECK-NEXT:    subs r3, #8
 ; CHECK-NEXT:    vneg.f16 q0, q0
-; CHECK-NEXT:    vfma.f16 q0, q1, r2
-; CHECK-NEXT:    vstrb.8 q0, [r3], #16
+; CHECK-NEXT:    vfma.f16 q0, q1, r12
+; CHECK-NEXT:    vstrb.8 q0, [r2], #16
 ; CHECK-NEXT:    bne .LBB13_2
 ; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-NEXT:    bx lr
 entry:
-  %C = load half, half* %CC
   %0 = and i32 %n, 7
   %cmp = icmp eq i32 %0, 0
   tail call void @llvm.assume(i1 %cmp)
@@ -718,7 +696,7 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 
-define dso_local void @test_nested(half* noalias nocapture %pInT1, half* noalias nocapture readonly %pOutT1, half* noalias nocapture readonly %pPRT_in, half* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l, half *%ina) local_unnamed_addr #0 {
+define dso_local void @test_nested(half* noalias nocapture %pInT1, half* noalias nocapture readonly %pOutT1, half* noalias nocapture readonly %pPRT_in, half* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_nested:
 ; CHECK:       @ %bb.0: @ %for.body.us.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
@@ -752,7 +730,6 @@ define dso_local void @test_nested(half* noalias nocapture %pInT1, half* noalias
 ; CHECK-NEXT:  @ %bb.4: @ %for.end14
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 for.body.us.preheader:
-  %in = load half, half* %ina
   %cmp = icmp sgt i32 %numRows, 0
   tail call void @llvm.assume(i1 %cmp)
   %cmp1 = icmp sgt i32 %numCols, 0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 3d0309d1579f..5b1a6c6d38cf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -696,7 +696,7 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 
-define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l, float %in) local_unnamed_addr #0 {
+define dso_local void @test_nested(float* noalias nocapture %pInT1, float* noalias nocapture readonly %pOutT1, float* noalias nocapture readonly %pPRT_in, float* noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_nested:
 ; CHECK:       @ %bb.0: @ %for.body.us.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}

diff  --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 40f3bdd1074c..1b03b982badf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -512,14 +512,12 @@ entry:
   ret <4 x float> %res
 }
 
-; TODO: Calling convention needs fixing to pass half types directly to functions
-define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
+define arm_aapcs_vfpcc <8 x half> @insert_f16(half %a) {
 ; CHECK-LABEL: insert_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %a = load half, half* %aa
   %res = insertelement <8 x half> undef, half %a, i32 0
   ret <8 x half> %res
 }

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll b/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
index 67dabc02917a..0c259ea93e3a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
@@ -86,14 +86,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fadd <8 x half> %src, %sp
@@ -113,14 +112,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fadd <8 x half> %sp, %src
@@ -142,14 +140,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %src2bc = bitcast half %src2 to i16
   %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
   %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -173,14 +170,13 @@ entry:
   ret <4 x float> %c
 }
 
-define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: vaddqr_v8f16_4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.f16 r0, s4
 ; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %src2 = load half, half *%src2p, align 2
   %src2bc = bitcast half %src2 to i16
   %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
   %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 9b107e6ce4a2..4da2618ae944 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -798,124 +798,122 @@ entry:
 
 
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 eq, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp oeq <8 x half> %src, %sp
@@ -923,141 +921,139 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_one_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    mov.w r2, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r2, #1
-; CHECK-MVE-NEXT:    vmov r0, s12
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it mi
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    it gt
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_one_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 ge, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q2, q1
+; CHECK-MVEFP-NEXT:    vpsel q0, q3, q2
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp one <8 x half> %src, %sp
@@ -1065,124 +1061,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ogt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it gt
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ogt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ogt <8 x half> %src, %sp
@@ -1190,124 +1184,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_oge_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ge
 ; CHECK-MVE-NEXT:    movge r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ge
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ge
 ; CHECK-MVE-NEXT:    movge r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_oge_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp oge <8 x half> %src, %sp
@@ -1315,124 +1307,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_olt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it mi
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_olt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp olt <8 x half> %src, %sp
@@ -1440,124 +1430,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ole_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ls
 ; CHECK-MVE-NEXT:    movls r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ls
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ls
 ; CHECK-MVE-NEXT:    movls r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ole_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ole <8 x half> %src, %sp
@@ -1565,141 +1553,139 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ueq_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    mov.w r2, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
-; CHECK-MVE-NEXT:    vmov r0, s12
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    it vs
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 ge, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ueq <8 x half> %src, %sp
@@ -1707,124 +1693,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ne
 ; CHECK-MVE-NEXT:    movne r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ne
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ne
 ; CHECK-MVE-NEXT:    movne r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_une_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ne, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp une <8 x half> %src, %sp
@@ -1832,124 +1816,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ugt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    it hi
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it hi
 ; CHECK-MVE-NEXT:    movhi r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it hi
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it hi
 ; CHECK-MVE-NEXT:    movhi r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ugt <8 x half> %src, %sp
@@ -1957,124 +1939,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_uge_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it pl
 ; CHECK-MVE-NEXT:    movpl r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it pl
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it pl
 ; CHECK-MVE-NEXT:    movpl r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp uge <8 x half> %src, %sp
@@ -2082,124 +2062,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ult_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it lt
 ; CHECK-MVE-NEXT:    movlt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it lt
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it lt
 ; CHECK-MVE-NEXT:    movlt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ult <8 x half> %src, %sp
@@ -2207,124 +2185,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ule_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it le
 ; CHECK-MVE-NEXT:    movle r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it le
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it le
 ; CHECK-MVE-NEXT:    movle r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ule <8 x half> %src, %sp
@@ -2332,125 +2308,123 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_ord_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vc
 ; CHECK-MVE-NEXT:    movvc r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it vc
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vc
 ; CHECK-MVE-NEXT:    movvc r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 ge, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q2, q1
+; CHECK-MVEFP-NEXT:    vpsel q0, q3, q2
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ord <8 x half> %src, %sp
@@ -2458,125 +2432,123 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_uno_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it vs
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 ge, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp uno <8 x half> %src, %sp
@@ -3383,124 +3355,122 @@ entry:
 
 
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_oeq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_oeq_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oeq_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_oeq_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 eq, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp oeq <8 x half> %sp, %src
@@ -3508,141 +3478,139 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_one_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    mov.w r2, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r2, #1
-; CHECK-MVE-NEXT:    vmov r0, s12
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
+; CHECK-MVE-NEXT:    movmi r1, #1
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it mi
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    it gt
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 le, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q2, q1
+; CHECK-MVEFP-NEXT:    vpsel q0, q3, q2
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp one <8 x half> %sp, %src
@@ -3650,124 +3618,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ogt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ogt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ogt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it gt
+; CHECK-MVE-NEXT:    movgt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it gt
 ; CHECK-MVE-NEXT:    movgt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it gt
-; CHECK-MVE-NEXT:    movgt r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ogt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ogt <8 x half> %sp, %src
@@ -3775,124 +3741,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_oge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_oge_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oge_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ge
 ; CHECK-MVE-NEXT:    movge r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ge
+; CHECK-MVE-NEXT:    movge r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ge
 ; CHECK-MVE-NEXT:    movge r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ge
-; CHECK-MVE-NEXT:    movge r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_oge_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp oge <8 x half> %sp, %src
@@ -3900,124 +3864,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_olt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_olt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_olt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it mi
+; CHECK-MVE-NEXT:    movmi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it mi
 ; CHECK-MVE-NEXT:    movmi r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it mi
-; CHECK-MVE-NEXT:    movmi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_olt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp olt <8 x half> %sp, %src
@@ -4025,124 +3987,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ole_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ole_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ole_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ls
 ; CHECK-MVE-NEXT:    movls r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ls
+; CHECK-MVE-NEXT:    movls r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ls
 ; CHECK-MVE-NEXT:    movls r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ls
-; CHECK-MVE-NEXT:    movls r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ole_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ole <8 x half> %sp, %src
@@ -4150,141 +4110,139 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ueq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ueq_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ueq_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    mov.w r2, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
-; CHECK-MVE-NEXT:    vmov r0, s12
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    moveq r1, #1
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
-; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
+; CHECK-MVE-NEXT:    movs r1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    it vs
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 le, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ueq <8 x half> %sp, %src
@@ -4292,124 +4250,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it ne
 ; CHECK-MVE-NEXT:    movne r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it ne
+; CHECK-MVE-NEXT:    movne r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it ne
 ; CHECK-MVE-NEXT:    movne r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it ne
-; CHECK-MVE-NEXT:    movne r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_une_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ne, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp une <8 x half> %sp, %src
@@ -4417,124 +4373,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ugt_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it hi
 ; CHECK-MVE-NEXT:    movhi r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it hi
+; CHECK-MVE-NEXT:    movhi r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it hi
 ; CHECK-MVE-NEXT:    movhi r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it hi
-; CHECK-MVE-NEXT:    movhi r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ugt <8 x half> %sp, %src
@@ -4542,124 +4496,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uge_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it pl
 ; CHECK-MVE-NEXT:    movpl r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it pl
+; CHECK-MVE-NEXT:    movpl r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it pl
 ; CHECK-MVE-NEXT:    movpl r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it pl
-; CHECK-MVE-NEXT:    movpl r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp uge <8 x half> %sp, %src
@@ -4667,124 +4619,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ult_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it lt
 ; CHECK-MVE-NEXT:    movlt r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it lt
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movlt r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it lt
 ; CHECK-MVE-NEXT:    movlt r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it lt
-; CHECK-MVE-NEXT:    movlt r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ult <8 x half> %sp, %src
@@ -4792,124 +4742,122 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ule_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it le
 ; CHECK-MVE-NEXT:    movle r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it le
+; CHECK-MVE-NEXT:    movle r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it le
 ; CHECK-MVE-NEXT:    movle r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it le
-; CHECK-MVE-NEXT:    movle r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ule <8 x half> %sp, %src
@@ -4917,125 +4865,123 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ord_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vc
 ; CHECK-MVE-NEXT:    movvc r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it vc
+; CHECK-MVE-NEXT:    movvc r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vc
 ; CHECK-MVE-NEXT:    movvc r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it vc
-; CHECK-MVE-NEXT:    movvc r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 le, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q2, q1
+; CHECK-MVEFP-NEXT:    vpsel q0, q3, q2
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp ord <8 x half> %sp, %src
@@ -5043,125 +4989,123 @@ entry:
   ret <8 x half> %s
 }
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_r_uno_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_r_uno_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uno_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s12
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s1
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s2
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s18
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s3
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it vs
+; CHECK-MVE-NEXT:    movvs r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s16, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it vs
 ; CHECK-MVE-NEXT:    movvs r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it vs
-; CHECK-MVE-NEXT:    movvs r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vpt.f16 le, q0, r0
 ; CHECK-MVEFP-NEXT:    vcmpt.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %i = insertelement <8 x half> undef, half %src2, i32 0
   %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
   %c = fcmp uno <8 x half> %sp, %src
@@ -5171,124 +5115,122 @@ entry:
 
 
 
-define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    .vsave {d8, d9}
+; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    mov.w r2, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmov r2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s13
+; CHECK-MVE-NEXT:    vmov.16 q4[0], r2
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
+; CHECK-MVE-NEXT:    vmov.16 q4[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    movs r0, #0
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s13, s9
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vmov.16 q4[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s14, s10
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s6
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
+; CHECK-MVE-NEXT:    vmov.16 q4[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s11
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s15
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s15, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r0, #1
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
-; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s18
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q4[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
-; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
+; CHECK-MVE-NEXT:    vmov.16 q4[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q4
+; CHECK-MVE-NEXT:    vpop {d8, d9}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT:    ldrh r0, [r0]
+; CHECK-MVEFP-NEXT:    vmov.f16 r0, s4
 ; CHECK-MVEFP-NEXT:    vcmp.f16 eq, q0, r0
-; CHECK-MVEFP-NEXT:    vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT:    vpsel q0, q2, q3
 ; CHECK-MVEFP-NEXT:    bx lr
 entry:
-  %src2 = load half, half* %src2p
   %src2bc = bitcast half %src2 to i16
   %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
   %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 75147225afb0..0f7393c85d67 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -103,37 +103,28 @@ entry:
   ret <4 x float> %outbc
 }
 
-; TODO: Calling convention needs fixing to pass half types directly to functions
-define arm_aapcs_vfpcc <8 x half> @vdup_f16(half* %src1, half* %src2) {
+define arm_aapcs_vfpcc <8 x half> @vdup_f16(half %0, half %1) {
 ; CHECK-LABEL: vdup_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, [r1]
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vadd.f16 s0, s2, s0
+; CHECK-NEXT:    vadd.f16 s0, s0, s1
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    vdup.16 q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %0 = load half, half *%src1, align 2
-  %1 = load half, half *%src2, align 2
   %2 = fadd half %0, %1
   %3 = insertelement <8 x half> undef, half %2, i32 0
   %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
   ret <8 x half> %out
 }
 
-define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half* %src1, half* %src2) {
+define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half %0, half %1) {
 ; CHECK-LABEL: vdup_f16_bc:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, [r1]
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vadd.f16 s0, s2, s0
+; CHECK-NEXT:    vadd.f16 s0, s0, s1
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    vdup.16 q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %0 = load half, half *%src1, align 2
-  %1 = load half, half *%src2, align 2
   %2 = fadd half %0, %1
   %bc = bitcast half %2 to i16
   %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
@@ -248,16 +239,12 @@ entry:
   ret float %ext
 }
 
-define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
+define arm_aapcs_vfpcc half @vdup_f16_extract(half %0, half %1) {
 ; CHECK-LABEL: vdup_f16_extract:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s0, [r1]
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vadd.f16 s0, s2, s0
+; CHECK-NEXT:    vadd.f16 s0, s0, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %0 = load half, half *%src1, align 2
-  %1 = load half, half *%src2, align 2
   %2 = fadd half %0, %1
   %bc = bitcast half %2 to i16
   %3 = insertelement <8 x i16> undef, i16 %bc, i32 0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
index 8ead4f5cfebf..c96baf10bc60 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
@@ -60,91 +60,75 @@ entry:
   ret float %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v2f16(<2 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v2f16(<2 x half> %x, half %y) {
 ; CHECK-LABEL: fadd_v2f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vadd.f16 s0, s0, s4
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    vmovx.f16 s6, s0
+; CHECK-NEXT:    vadd.f16 s0, s0, s6
+; CHECK-NEXT:    vadd.f16 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fadd.f16.v2f16(half %y, <2 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v4f16(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v4f16(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fadd_v4f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vadd.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vadd.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vadd.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vadd.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vadd.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vadd.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fadd_v4f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vadd.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vadd.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v8f16(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v8f16(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fadd_v8f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vadd.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vadd.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vadd.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vadd.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vadd.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vadd.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vadd.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vadd.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fadd_v8f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vadd.f16 s4, s4, s3
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vadd.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vadd.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vadd.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v16f16(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v16f16(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fadd_v16f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vadd.f16 q0, q0, q1
@@ -152,46 +136,40 @@ define arm_aapcs_vfpcc void @fadd_v16f16(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vadd.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vadd.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vadd.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vadd.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vadd.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fadd_v16f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vadd.f16 s10, s0, s4
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vadd.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
 ; CHECK-NOFP-NEXT:    vadd.f16 s10, s12, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vadd.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vadd.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vadd.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s14, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vadd.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vadd.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vadd.f16 s10, s3, s7
-; CHECK-NOFP-NEXT:    vadd.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vadd.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vadd.f16 s12, s3, s7
+; CHECK-NOFP-NEXT:    vadd.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vadd.f16 s0, s0, s4
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vadd.f16 s0, s10, s0
 ; CHECK-NOFP-NEXT:    vadd.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vadd.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
 define arm_aapcs_vfpcc double @fadd_v1f64(<1 x double> %x, double %y) {
@@ -269,60 +247,51 @@ entry:
   ret float %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v4f16_nofast(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v4f16_nofast(<4 x half> %x, half %y) {
 ; CHECK-LABEL: fadd_v4f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s4, [r0]
-; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vadd.f16 s4, s4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s1
+; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vadd.f16 s4, s4, s6
+; CHECK-NEXT:    vmovx.f16 s0, s1
 ; CHECK-NEXT:    vadd.f16 s4, s4, s1
 ; CHECK-NEXT:    vadd.f16 s0, s4, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v8f16_nofast(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v8f16_nofast(<8 x half> %x, half %y) {
 ; CHECK-LABEL: fadd_v8f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s4, [r0]
-; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vadd.f16 s4, s4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s3
+; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vadd.f16 s4, s4, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s1
 ; CHECK-NEXT:    vadd.f16 s4, s4, s1
+; CHECK-NEXT:    vmovx.f16 s0, s3
 ; CHECK-NEXT:    vadd.f16 s4, s4, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s2
 ; CHECK-NEXT:    vadd.f16 s4, s4, s2
 ; CHECK-NEXT:    vadd.f16 s4, s4, s6
 ; CHECK-NEXT:    vadd.f16 s4, s4, s3
 ; CHECK-NEXT:    vadd.f16 s0, s4, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fadd_v16f16_nofast(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fadd_v16f16_nofast(<16 x half> %x, half %y) {
 ; CHECK-LABEL: fadd_v16f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s8, [r0]
-; CHECK-NEXT:    vmovx.f16 s10, s0
 ; CHECK-NEXT:    vadd.f16 s8, s8, s0
-; CHECK-NEXT:    vmovx.f16 s0, s3
+; CHECK-NEXT:    vmovx.f16 s10, s0
 ; CHECK-NEXT:    vadd.f16 s8, s8, s10
 ; CHECK-NEXT:    vmovx.f16 s10, s1
 ; CHECK-NEXT:    vadd.f16 s8, s8, s1
+; CHECK-NEXT:    vmovx.f16 s0, s3
 ; CHECK-NEXT:    vadd.f16 s8, s8, s10
 ; CHECK-NEXT:    vmovx.f16 s10, s2
 ; CHECK-NEXT:    vadd.f16 s8, s8, s2
@@ -341,13 +310,10 @@ define arm_aapcs_vfpcc void @fadd_v16f16_nofast(<16 x half> %x, half* %yy) {
 ; CHECK-NEXT:    vmovx.f16 s2, s7
 ; CHECK-NEXT:    vadd.f16 s0, s0, s7
 ; CHECK-NEXT:    vadd.f16 s0, s0, s2
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
 define arm_aapcs_vfpcc double @fadd_v1f64_nofast(<1 x double> %x, double %y) {

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index 45c6972312e3..07a0077b0930 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -459,97 +459,81 @@ entry:
   ret float %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v4f16_acc(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v4f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vminnm.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v4f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v2f16_acc(<2 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
 ; CHECK-LABEL: fmin_v2f16_acc:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    vmovx.f16 s6, s0
+; CHECK-NEXT:    vminnm.f16 s0, s0, s6
+; CHECK-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v8f16_acc(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v8f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vminnm.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v8f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v16f16_acc(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v16f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
@@ -557,64 +541,58 @@ define arm_aapcs_vfpcc void @fmin_v16f16_acc(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vminnm.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v16f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
-; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
+; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s5, s1
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
 ; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vcmp.f16 s12, s14
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s6, s2
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
-; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vcmp.f16 s12, s14
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s7, s3
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s3, s7
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s10, s0
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
 define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
@@ -735,87 +713,75 @@ entry:
   ret float %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v4f16_acc_nofast(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v4f16_acc_nofast(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v4f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vminnm.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s0, s2
+; CHECK-FP-NEXT:    vcmp.f16 s0, s4
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s6, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v8f16_acc_nofast(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v8f16_acc_nofast(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v8f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vminnm.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s0, s2
+; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vcmp.f16 s0, s4
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vminnm.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s6, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmin_v16f16_acc_nofast(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmin_v16f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
@@ -823,52 +789,46 @@ define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s0, s2
+; CHECK-FP-NEXT:    vcmp.f16 s0, s8
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vminnm.f16 s10, s0, s4
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vminnm.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
 ; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vminnm.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vminnm.f16 s10, s3, s7
-; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vminnm.f16 s12, s3, s7
+; CHECK-NOFP-NEXT:    vminnm.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
+; CHECK-NOFP-NEXT:    vminnm.f16 s0, s10, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s0, s8
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
 define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) {
@@ -1373,97 +1333,81 @@ entry:
   ret float %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v2f16_acc(<2 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
 ; CHECK-LABEL: fmax_v2f16_acc:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    vmovx.f16 s6, s0
+; CHECK-NEXT:    vmaxnm.f16 s0, s0, s6
+; CHECK-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v4f16_acc(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v4f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vmaxnm.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v4f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v8f16_acc(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v8f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vmaxnm.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v8f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v16f16_acc(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v16f16_acc:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
@@ -1471,64 +1415,58 @@ define arm_aapcs_vfpcc void @fmax_v16f16_acc(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v16f16_acc:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s10, s8
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
-; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
+; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s1, s5
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
 ; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vcmp.f16 s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s2, s6
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
-; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vcmp.f16 s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s3, s7
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vselgt.f16 s12, s3, s7
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s10, s0
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
 define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
@@ -1649,87 +1587,75 @@ entry:
   ret float %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v4f16_acc_nofast(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v4f16_acc_nofast(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v4f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vmaxnm.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s2, s0
+; CHECK-FP-NEXT:    vcmp.f16 s4, s0
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s6, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v8f16_acc_nofast(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v8f16_acc_nofast(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v8f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vmaxnm.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s2, s0
+; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vcmp.f16 s4, s0
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s6, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
-define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmax_v16f16_acc_nofast(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmax_v16f16_acc_nofast:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
@@ -1737,52 +1663,46 @@ define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vcmp.f16 s2, s0
+; CHECK-FP-NEXT:    vcmp.f16 s8, s0
 ; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vselgt.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s0, s4
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s14, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s3, s7
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s12, s3, s7
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
+; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s10, s0
+; CHECK-NOFP-NEXT:    vcmp.f16 s8, s0
 ; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
+; CHECK-NOFP-NEXT:    vselgt.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
-  store half %r, half* %yy
-  ret void
+  ret half %r
 }
 
 define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) {

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
index 940e2e2f4a0c..bbc30d99d10c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
@@ -60,91 +60,75 @@ entry:
   ret float %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v2f16(<2 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v2f16(<2 x half> %x, half %y) {
 ; CHECK-LABEL: fmul_v2f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vmul.f16 s0, s0, s4
-; CHECK-NEXT:    vldr.16 s2, [r0]
-; CHECK-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    vmovx.f16 s6, s0
+; CHECK-NEXT:    vmul.f16 s0, s0, s6
+; CHECK-NEXT:    vmul.f16 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v4f16(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v4f16(<4 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmul_v4f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmovx.f16 s4, s1
-; CHECK-FP-NEXT:    vmovx.f16 s6, s0
+; CHECK-FP-NEXT:    vmovx.f16 s6, s1
+; CHECK-FP-NEXT:    vmovx.f16 s8, s0
+; CHECK-FP-NEXT:    vmul.f16 s6, s1, s6
+; CHECK-FP-NEXT:    vmul.f16 s0, s0, s8
 ; CHECK-FP-NEXT:    vmul.f16 s0, s0, s6
-; CHECK-FP-NEXT:    vmul.f16 s4, s1, s4
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmul.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmul.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmul_v4f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmul.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vmul.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v8f16(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v8f16(<8 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmul_v8f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vrev32.16 q1, q0
-; CHECK-FP-NEXT:    vmul.f16 q0, q0, q1
-; CHECK-FP-NEXT:    vmul.f16 s4, s2, s3
+; CHECK-FP-NEXT:    vrev32.16 q2, q0
+; CHECK-FP-NEXT:    vmul.f16 q0, q0, q2
+; CHECK-FP-NEXT:    vmul.f16 s6, s2, s3
 ; CHECK-FP-NEXT:    vmul.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
-; CHECK-FP-NEXT:    vmul.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmul.f16 s0, s0, s6
+; CHECK-FP-NEXT:    vmul.f16 s0, s4, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmul_v8f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s0, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s6, s0
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s0, s6
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s1
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s2
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s6
-; CHECK-NOFP-NEXT:    vmul.f16 s4, s4, s3
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s1
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmovx.f16 s8, s2
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s2
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s8
+; CHECK-NOFP-NEXT:    vmul.f16 s6, s6, s3
+; CHECK-NOFP-NEXT:    vmul.f16 s0, s6, s0
 ; CHECK-NOFP-NEXT:    vmul.f16 s0, s4, s0
-; CHECK-NOFP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v16f16(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v16f16(<16 x half> %x, half %y) {
 ; CHECK-FP-LABEL: fmul_v16f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
 ; CHECK-FP-NEXT:    vmul.f16 q0, q0, q1
@@ -152,46 +136,40 @@ define arm_aapcs_vfpcc void @fmul_v16f16(<16 x half> %x, half* %yy) {
 ; CHECK-FP-NEXT:    vmul.f16 q0, q0, q1
 ; CHECK-FP-NEXT:    vmul.f16 s4, s2, s3
 ; CHECK-FP-NEXT:    vmul.f16 s0, s0, s1
-; CHECK-FP-NEXT:    vldr.16 s2, [r0]
 ; CHECK-FP-NEXT:    vmul.f16 s0, s0, s4
-; CHECK-FP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-FP-NEXT:    vstr.16 s0, [r0]
+; CHECK-FP-NEXT:    vmul.f16 s0, s8, s0
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-NOFP-LABEL: fmul_v16f16:
 ; CHECK-NOFP:       @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vmul.f16 s10, s0, s4
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s10, s8
-; CHECK-NOFP-NEXT:    vmul.f16 s10, s1, s5
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
-; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s0
 ; CHECK-NOFP-NEXT:    vmul.f16 s10, s12, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmul.f16 s10, s2, s6
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s0, s4
 ; CHECK-NOFP-NEXT:    vmul.f16 s10, s12, s10
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s1, s5
+; CHECK-NOFP-NEXT:    vmul.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s5
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s1
+; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s14, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s14, s2
+; CHECK-NOFP-NEXT:    vmul.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s2, s6
+; CHECK-NOFP-NEXT:    vmul.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmovx.f16 s12, s6
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s14, s12
 ; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s8, s10
-; CHECK-NOFP-NEXT:    vmul.f16 s10, s3, s7
-; CHECK-NOFP-NEXT:    vmul.f16 s8, s8, s10
+; CHECK-NOFP-NEXT:    vmul.f16 s10, s10, s12
+; CHECK-NOFP-NEXT:    vmul.f16 s12, s3, s7
+; CHECK-NOFP-NEXT:    vmul.f16 s10, s10, s12
 ; CHECK-NOFP-NEXT:    vmul.f16 s0, s0, s4
-; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
+; CHECK-NOFP-NEXT:    vmul.f16 s0, s10, s0
 ; CHECK-NOFP-NEXT:    vmul.f16 s0, s8, s0
-; CHECK-NOFP-NEXT:    vmul.f16 s0, s2, s0
-; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
 define arm_aapcs_vfpcc double @fmul_v1f64(<1 x double> %x, double %y) {
@@ -269,76 +247,63 @@ entry:
   ret float %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v2f16_nofast(<2 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v2f16_nofast(<2 x half> %x, half %y) {
 ; CHECK-LABEL: fmul_v2f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s4, [r0]
 ; CHECK-NEXT:    vmul.f16 s4, s4, s0
 ; CHECK-NEXT:    vmovx.f16 s0, s0
 ; CHECK-NEXT:    vmul.f16 s0, s4, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v4f16_nofast(<4 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v4f16_nofast(<4 x half> %x, half %y) {
 ; CHECK-LABEL: fmul_v4f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s4, [r0]
-; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vmul.f16 s4, s4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s1
+; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vmul.f16 s4, s4, s6
+; CHECK-NEXT:    vmovx.f16 s0, s1
 ; CHECK-NEXT:    vmul.f16 s4, s4, s1
 ; CHECK-NEXT:    vmul.f16 s0, s4, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v8f16_nofast(<8 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v8f16_nofast(<8 x half> %x, half %y) {
 ; CHECK-LABEL: fmul_v8f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s4, [r0]
-; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vmul.f16 s4, s4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s3
+; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vmul.f16 s4, s4, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s1
 ; CHECK-NEXT:    vmul.f16 s4, s4, s1
+; CHECK-NEXT:    vmovx.f16 s0, s3
 ; CHECK-NEXT:    vmul.f16 s4, s4, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s2
 ; CHECK-NEXT:    vmul.f16 s4, s4, s2
 ; CHECK-NEXT:    vmul.f16 s4, s4, s6
 ; CHECK-NEXT:    vmul.f16 s4, s4, s3
 ; CHECK-NEXT:    vmul.f16 s0, s4, s0
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
-define arm_aapcs_vfpcc void @fmul_v16f16_nofast(<16 x half> %x, half* %yy) {
+define arm_aapcs_vfpcc half @fmul_v16f16_nofast(<16 x half> %x, half %y) {
 ; CHECK-LABEL: fmul_v16f16_nofast:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr.16 s8, [r0]
-; CHECK-NEXT:    vmovx.f16 s10, s0
 ; CHECK-NEXT:    vmul.f16 s8, s8, s0
-; CHECK-NEXT:    vmovx.f16 s0, s3
+; CHECK-NEXT:    vmovx.f16 s10, s0
 ; CHECK-NEXT:    vmul.f16 s8, s8, s10
 ; CHECK-NEXT:    vmovx.f16 s10, s1
 ; CHECK-NEXT:    vmul.f16 s8, s8, s1
+; CHECK-NEXT:    vmovx.f16 s0, s3
 ; CHECK-NEXT:    vmul.f16 s8, s8, s10
 ; CHECK-NEXT:    vmovx.f16 s10, s2
 ; CHECK-NEXT:    vmul.f16 s8, s8, s2
@@ -357,13 +322,10 @@ define arm_aapcs_vfpcc void @fmul_v16f16_nofast(<16 x half> %x, half* %yy) {
 ; CHECK-NEXT:    vmovx.f16 s2, s7
 ; CHECK-NEXT:    vmul.f16 s0, s0, s7
 ; CHECK-NEXT:    vmul.f16 s0, s0, s2
-; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
-  %y = load half, half* %yy
   %z = call half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x)
-  store half %z, half* %yy
-  ret void
+  ret half %z
 }
 
 define arm_aapcs_vfpcc double @fmul_v1f64_nofast(<1 x double> %x, double %y) {


        


More information about the llvm-branch-commits mailing list