[llvm] [LegalizeDAG][RISCV] Don't promote f16 vector ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh. (PR #106652)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 20:53:50 PDT 2024


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/106652

>From 83975c9adf3181fed7e4a8050d83e6e17f836360 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 18:37:06 -0700
Subject: [PATCH 1/3] [LegalizeDAG][RISCV] Don't promote f16 vector
 ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh.

The fp_extend will canonicalize NaNs which is not the semantics of
FNEG/FABS/FCOPYSIGN.

For fixed vectors I'm scalarizing due to test changes on other targets
where the scalarization is expected. I will try to address in a follow up.

For scalable vectors, we bitcast to integer and use integer logic ops.
---
 .../SelectionDAG/LegalizeVectorOps.cpp        |   58 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |    9 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 5756 +++++++++++++++--
 llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll   |   65 +-
 .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll    |  647 +-
 .../RISCV/rvv/vfmsub-constrained-sdnode.ll    |  275 +-
 llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll   |   59 +-
 .../RISCV/rvv/vfnmadd-constrained-sdnode.ll   |  424 +-
 .../RISCV/rvv/vfnmsub-constrained-sdnode.ll   |  341 +-
 9 files changed, 5873 insertions(+), 1761 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2557fa288606e7..b551462831acef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -139,6 +139,8 @@ class VectorLegalizer {
   std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
   SDValue ExpandStore(SDNode *N);
   SDValue ExpandFNEG(SDNode *Node);
+  SDValue ExpandFABS(SDNode *Node);
+  SDValue ExpandFCOPYSIGN(SDNode *Node);
   void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -913,6 +915,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   case ISD::FNEG:
     Results.push_back(ExpandFNEG(Node));
     return;
+  case ISD::FABS:
+    Results.push_back(ExpandFABS(Node));
+    return;
+  case ISD::FCOPYSIGN:
+    Results.push_back(ExpandFCOPYSIGN(Node));
+    return;
   case ISD::FSUB:
     ExpandFSUB(Node, Results);
     return;
@@ -1674,7 +1682,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
 
   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
   if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
-      TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
+      (TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector())) {
     SDLoc DL(Node);
     SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
     SDValue SignMask = DAG.getConstant(
@@ -1685,6 +1693,54 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
   return DAG.UnrollVectorOp(Node);
 }
 
+SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
+  EVT VT = Node->getValueType(0);
+  EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+  // FIXME: We shouldn't restrict this to scalable vectors.
+  if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+      VT.isScalableVector()) {
+    SDLoc DL(Node);
+    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+    SDValue ClearSignMask = DAG.getConstant(
+        APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+    SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+    return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
+  }
+  return DAG.UnrollVectorOp(Node);
+}
+
+SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
+  EVT VT = Node->getValueType(0);
+  EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+  // FIXME: We shouldn't restrict this to scalable vectors.
+  if (VT == Node->getOperand(1).getValueType() &&
+      TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
+      TLI.isOperationLegalOrCustom(ISD::OR, IntVT) &&
+      VT.isScalableVector()) {
+    SDLoc DL(Node);
+    SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+    SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+
+    SDValue SignMask = DAG.getConstant(
+        APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+    SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+
+    SDValue ClearSignMask = DAG.getConstant(
+        APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+    SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+
+    SDNodeFlags Flags;
+    Flags.setDisjoint(true);
+
+    SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+    return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
+  }
+  return DAG.UnrollVectorOp(Node);
+}
+
 void VectorLegalizer::ExpandFSUB(SDNode *Node,
                                  SmallVectorImpl<SDValue> &Results) {
   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 09928dcc1f489a..cddd65f58baba8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -884,7 +884,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     static const unsigned ZvfhminPromoteOps[] = {
         ISD::FMINNUM,     ISD::FMAXNUM,      ISD::FADD,        ISD::FSUB,
         ISD::FMUL,        ISD::FMA,          ISD::FDIV,        ISD::FSQRT,
-        ISD::FABS,        ISD::FNEG,         ISD::FCOPYSIGN,   ISD::FCEIL,
+        ISD::FCEIL,
         ISD::FFLOOR,      ISD::FROUND,       ISD::FROUNDEVEN,  ISD::FRINT,
         ISD::FNEARBYINT,  ISD::IS_FPCLASS,   ISD::SETCC,       ISD::FMAXIMUM,
         ISD::FMINIMUM,    ISD::STRICT_FADD,  ISD::STRICT_FSUB, ISD::STRICT_FMUL,
@@ -1016,6 +1016,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         // load/store
         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
 
+        setOperationAction(ISD::FNEG, VT, Expand);
+        setOperationAction(ISD::FABS, VT, Expand);
+        setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
         // Custom split nxv32f16 since nxv32f32 if not legal.
         if (VT == MVT::nxv32f16) {
           setOperationAction(ZvfhminPromoteOps, VT, Custom);
@@ -1271,6 +1275,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
             // available.
             setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
           }
+          setOperationAction(ISD::FNEG, VT, Expand);
+          setOperationAction(ISD::FABS, VT, Expand);
+          setOperationAction(ISD::FCOPYSIGN, VT, Expand);
           MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
           // Don't promote f16 vector operations to f32 if f32 vector type is
           // not legal.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index fb9c0a57fd1bee..9ec4ed90720b95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -514,176 +514,83 @@ define void @fneg_v8f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fneg_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = fneg <8 x half> %a
-  store <8 x half> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v6f16(ptr %x) {
-; ZVFH-LABEL: fneg_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfneg.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-RV32-LABEL: fneg_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = fneg <6 x half> %a
-  store <6 x half> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v4f32(ptr %x) {
-; ZVFH-LABEL: fneg_v4f32:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; ZVFH-NEXT:    vle32.v v8, (a0)
-; ZVFH-NEXT:    vfneg.v v8, v8
-; ZVFH-NEXT:    vse32.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fneg_v4f32:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle32.v v8, (a0)
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = fneg <4 x float> %a
-  store <4 x float> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v2f64(ptr %x) {
-; CHECK-LABEL: fneg_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfneg.v v8, v8
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = fneg <2 x double> %a
-  store <2 x double> %b, ptr %x
-  ret void
-}
-
-define void @fabs_v8f16(ptr %x) {
-; ZVFH-LABEL: fabs_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfabs.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fabs_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-
-define void @fabs_v6f16(ptr %x) {
-; ZVFH-LABEL: fabs_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfabs.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v8f16:
 ; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
 ; ZVFHMIN-ZFH-RV32-NEXT:    ret
 ;
-; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v8f16:
 ; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
 ; ZVFHMIN-ZFH-RV64-NEXT:    ret
 ;
-; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v8f16:
 ; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
@@ -691,246 +598,1645 @@ define void @fabs_v6f16(ptr %x) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 52(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 50(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 46(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 44(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 42(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 40(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 40
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
 ;
-; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v8f16:
 ; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -80
-; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
+  %a = load <8 x half>, ptr %x
+  %b = fneg <8 x half> %a
+  store <8 x half> %b, ptr %x
+  ret void
+}
+
+define void @fneg_v6f16(ptr %x) {
+; ZVFH-LABEL: fneg_v6f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vfneg.v v8, v8
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa3, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa2, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 78(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 66(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 56
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 70(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 68(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
+  %b = fneg <6 x half> %a
   store <6 x half> %b, ptr %x
   ret void
 }
-declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
 
-define void @fabs_v4f32(ptr %x) {
-; ZVFH-LABEL: fabs_v4f32:
+define void @fneg_v4f32(ptr %x) {
+; ZVFH-LABEL: fneg_v4f32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; ZVFH-NEXT:    vle32.v v8, (a0)
-; ZVFH-NEXT:    vfabs.v v8, v8
+; ZVFH-NEXT:    vfneg.v v8, v8
 ; ZVFH-NEXT:    vse32.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fabs_v4f32:
+; ZVFHMIN-LABEL: fneg_v4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle32.v v8, (a0)
-; ZVFHMIN-NEXT:    vfabs.v v8, v8
+; ZVFHMIN-NEXT:    vfneg.v v8, v8
 ; ZVFHMIN-NEXT:    vse32.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %b = fneg <4 x float> %a
   store <4 x float> %b, ptr %x
   ret void
 }
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
 
-define void @fabs_v2f64(ptr %x) {
-; CHECK-LABEL: fabs_v2f64:
+define void @fneg_v2f64(ptr %x) {
+; CHECK-LABEL: fneg_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v8, v8
+; CHECK-NEXT:    vfneg.v v8, v8
 ; CHECK-NEXT:    vse64.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  %b = fneg <2 x double> %a
   store <2 x double> %b, ptr %x
   ret void
 }
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
 
-define void @copysign_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v8f16:
+define void @fabs_v8f16(ptr %x) {
+; ZVFH-LABEL: fabs_v8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
-  store <8 x half> %c, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
-
-define void @copysign_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT:    vfabs.v v8, v8
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: copysign_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: copysign_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFH-RV64-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
+  %a = load <8 x half>, ptr %x
+  %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  store <8 x half> %b, ptr %x
+  ret void
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+
+define void @fabs_v6f16(ptr %x) {
+; ZVFH-LABEL: fabs_v6f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vfabs.v v8, v8
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa3, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fabs.h fa2, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 42(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 40
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
+  %a = load <6 x half>, ptr %x
+  %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
+  store <6 x half> %b, ptr %x
+  ret void
+}
+declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
+
+define void @fabs_v4f32(ptr %x) {
+; ZVFH-LABEL: fabs_v4f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vfabs.v v8, v8
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fabs_v4f32:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vle32.v v8, (a0)
+; ZVFHMIN-NEXT:    vfabs.v v8, v8
+; ZVFHMIN-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
+  %a = load <4 x float>, ptr %x
+  %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  store <4 x float> %b, ptr %x
+  ret void
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+
+define void @fabs_v2f64(ptr %x) {
+; CHECK-LABEL: fabs_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfabs.v v8, v8
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x double>, ptr %x
+  %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  store <2 x double> %b, ptr %x
+  ret void
+}
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+define void @copysign_v8f16(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_v8f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v9, (a1)
+; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
+  %a = load <8 x half>, ptr %x
+  %b = load <8 x half>, ptr %y
+  %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
+  store <8 x half> %c, ptr %x
+  ret void
+}
+declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+
+define void @copysign_v6f16(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_v6f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v9, (a1)
+; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 40
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 118(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 114(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 104
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 176
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
@@ -991,24 +2297,285 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: copysign_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 148(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 146(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
   %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1026,52 +2593,315 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa3, fa3, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa2, fa2, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 90(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 88
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 148(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 146(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -1130,24 +2960,409 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: copysign_neg_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -144
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 144
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa1, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft2, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft1, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft1, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft0, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa1, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa1, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 109(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 117(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 125(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 142(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 140(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 138(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 134(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 130(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 144
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa1, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft2, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft1, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft1, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft0, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa1, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa1, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa2, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa2, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa3, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa3, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa4, 200(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa4, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 216(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 169(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 185(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 201(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 217(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 238(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 236(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 234(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 232(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 230(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 228(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 226(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 224(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
   %c = fneg <8 x half> %b
@@ -1166,52 +3381,439 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh ft1, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh ft0, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa0, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa1, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa2, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa3, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa4, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 40
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa1, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft1, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft2, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft1, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft1, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh ft0, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa0, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa1, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa1, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 109(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 117(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 125(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 133(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 148(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 146(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa2, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 144(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 142(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa4, 140(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa3, 138(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa2, 136(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 136
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa1, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft2, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft1, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft1, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh ft0, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa0, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa1, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa1, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa2, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa2, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa3, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa3, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa4, 200(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa4, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 216(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 169(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 185(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 201(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 217(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 238(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 236(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 234(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 232(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 230(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 228(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 226(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 224(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 240
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fneg <6 x half> %b
@@ -1274,25 +3876,245 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vle32.v v9, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 78(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 72
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 120
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <4 x half>, ptr %x
   %b = load <4 x float>, ptr %y
   %c = fneg <4 x float> %b
@@ -1316,69 +4138,278 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT:    fsh fa5, 4(a0)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle32.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 40
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    mv a2, sp
-; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a2)
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle32.v v9, (a1)
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT:    fsh fa5, 4(a0)
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse32.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a2, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 72
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -144
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 144
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 24
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 134(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 130(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 136
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 140(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <3 x half>, ptr %x
   %b = load <3 x float>, ptr %y
   %c = fneg <3 x float> %b
@@ -1676,26 +4707,257 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fmsub_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
   %c = load <8 x half>, ptr %z
@@ -1716,56 +4978,275 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -2153,17 +5634,421 @@ define void @fneg_v16f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fneg_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFH-RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFH-RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_offset ra, -4
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_offset s0, -8
+; ZVFHMIN-ZFH-RV32-NEXT:    addi s0, sp, 96
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFH-RV32-NEXT:    andi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, s0, -96
+; ZVFHMIN-ZFH-RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFH-RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFH-RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFH-RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-ZFH-RV64-NEXT:    addi s0, sp, 96
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFH-RV64-NEXT:    andi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, s0, -96
+; ZVFHMIN-ZFH-RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFH-RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_offset ra, -4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_offset s0, -8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi s0, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    andi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 118(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 114(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, s0, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi s0, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    andi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 190(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 188(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 186(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 182(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 180(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 178(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, s0, -224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = fneg <16 x half> %a
   store <16 x half> %b, ptr %x
@@ -3727,31 +7612,281 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 ; ZVFH-NEXT:    vse16.v v9, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fmsub_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vle16.v v9, (a1)
-; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
   %c = insertelement <8 x half> poison, half %z, i32 0
@@ -3772,66 +7907,299 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFH-NEXT:    vse16.v v9, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFH-RV32:       # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV32-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT:    ret
 ;
-; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFH-RV64:       # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV64-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV32:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT:    sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV64:       # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT:    sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 95a410ea56b74a..4bf9ae16cdaf01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -19,12 +19,10 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
   ret <vscale x 1 x half> %r
@@ -41,12 +39,10 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
   ret <vscale x 2 x half> %r
@@ -63,12 +59,10 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
   ret <vscale x 4 x half> %r
@@ -85,12 +79,10 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
   ret <vscale x 8 x half> %r
@@ -107,12 +99,10 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
   ret <vscale x 16 x half> %r
@@ -129,17 +119,10 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
 ;
 ; ZVFHMIN-LABEL: vfabs_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
   ret <vscale x 32 x half> %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 029a121d08980c..c71c07488581a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -19,13 +19,12 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs)
   ret <vscale x 1 x half> %r
@@ -45,12 +44,11 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -67,18 +65,13 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 1 x half> %vs
   %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n)
@@ -99,17 +92,12 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -130,12 +118,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half>
   %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -158,12 +145,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x float> poison, float %s, i32 0
   %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -182,19 +168,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 1 x float> %vs
   %eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half>
@@ -216,19 +197,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x float> poison, float %s, i32 0
   %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -254,12 +230,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
 ; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half>
   %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -286,12 +261,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
 ; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -312,22 +286,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 1 x double> %vs
   %eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half>
@@ -351,22 +320,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -387,13 +351,12 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs)
   ret <vscale x 2 x half> %r
@@ -413,12 +376,11 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -435,18 +397,13 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 2 x half> %vs
   %r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n)
@@ -467,17 +424,12 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -497,13 +449,12 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs)
   ret <vscale x 4 x half> %r
@@ -523,12 +474,11 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -545,18 +495,13 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 4 x half> %vs
   %r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n)
@@ -577,17 +522,12 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -607,13 +547,12 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs)
   ret <vscale x 8 x half> %r
@@ -633,12 +572,11 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -655,18 +593,13 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 8 x half> %vs
   %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n)
@@ -687,17 +620,12 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -718,12 +646,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half>
   %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -746,12 +673,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %s, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -770,19 +696,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 8 x float> %vs
   %eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half>
@@ -804,19 +725,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %s, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -842,12 +758,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x
 ; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half>
   %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -874,12 +789,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x
 ; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -900,22 +814,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 8 x double> %vs
   %eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half>
@@ -939,22 +848,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -975,13 +879,12 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v12, v12, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs)
   ret <vscale x 16 x half> %r
@@ -1001,12 +904,11 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1023,18 +925,13 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, <
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vand.vx v12, v12, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 16 x half> %vs
   %r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n)
@@ -1055,17 +952,12 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1085,19 +977,12 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v
 ;
 ; ZVFHMIN-LABEL: vfcopysign_vv_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v16, v16, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs)
   ret <vscale x 32 x half> %r
@@ -1117,17 +1002,13 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vmv.v.v v28, v24
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v16, v24, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -1144,29 +1025,13 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, <
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_vv_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v24, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v20, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v24, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v24, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT:    vand.vx v16, v16, a0
+; ZVFHMIN-NEXT:    addi a0, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    ret
   %n = fneg <vscale x 32 x half> %vs
   %r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n)
@@ -1187,22 +1052,14 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vmv.v.v v28, v24
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT:    addi a1, a0, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vand.vx v16, v16, a0
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %s, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
index c835dc72268b32..725ac14b0e7a7e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v12, v10, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 1 x half> %vc
   %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v12, v9, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 2 x half> %vb
   %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v16, v10, v14
+; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 4 x half> %vc
   %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,16 +168,13 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v20, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 8 x half> %va
   %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,16 +226,13 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v20, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
@@ -278,19 +254,30 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v24, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 2
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v16, v24, v0
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 2
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 16 x half> %vb
   %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -312,16 +299,13 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
@@ -351,58 +335,60 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vmv4r.v v20, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v0, v8, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
@@ -431,85 +417,78 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 28 * vlenb
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 12
+; ZVFHMIN-NEXT:    li a1, 20
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmv.v.f v0, fa5
+; ZVFHMIN-NEXT:    vfmv.v.f v24, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v20, v24
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs4r.v v20, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 12
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 2
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
+; ZVFHMIN-NEXT:    li a1, 12
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 2
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v0, v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
+; ZVFHMIN-NEXT:    li a1, 12
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl4r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 12
+; ZVFHMIN-NEXT:    li a1, 20
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
+; ZVFHMIN-NEXT:    li a1, 12
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 28
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 21b895b812354a..2991e52d4266a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -17,12 +17,9 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 1 x half> %va
   ret <vscale x 1 x half> %vb
@@ -37,12 +34,9 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 2 x half> %va
   ret <vscale x 2 x half> %vb
@@ -57,12 +51,9 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 4 x half> %va
   ret <vscale x 4 x half> %vb
@@ -77,12 +68,9 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 8 x half> %va
   ret <vscale x 8 x half> %vb
@@ -97,12 +85,9 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 16 x half> %va
   ret <vscale x 16 x half> %vb
@@ -117,17 +102,9 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) {
 ;
 ; ZVFHMIN-LABEL: vfneg_vv_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %vb = fneg <vscale x 32 x half> %va
   ret <vscale x 32 x half> %vb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index b54590cd9d8440..2f41b59d6b2253 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -22,24 +22,17 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 1 x half> %va
   %neg2 = fneg <vscale x 1 x half> %vc
@@ -61,23 +54,16 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -98,24 +84,17 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 2 x half> %va
   %neg2 = fneg <vscale x 2 x half> %vb
@@ -137,23 +116,16 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -174,24 +146,17 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v14
+; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 4 x half> %vb
   %neg2 = fneg <vscale x 4 x half> %vc
@@ -213,23 +178,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -250,24 +208,17 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v20, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 8 x half> %vb
   %neg2 = fneg <vscale x 8 x half> %va
@@ -289,23 +240,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v20, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -326,25 +270,17 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 16 x half> %vc
   %neg2 = fneg <vscale x 16 x half> %vb
@@ -361,29 +297,21 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv4r.v v28, v8
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -408,92 +336,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a0
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vxor.vx v0, v16, a0
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v28
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -515,80 +430,95 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
+; ZVFHMIN-NEXT:    li a1, 40
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 12
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v24, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 5
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmv.v.v v4, v0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v24, v0, a0
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 5
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 12
+; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 12
+; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 20
+; ZVFHMIN-NEXT:    li a1, 40
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 2f7e693a8a6f98..dc23b7dfbf1ee4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v10, v11, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 1 x half> %va
   %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %neg, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v11
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 2 x half> %va
   %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %neg, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v10, v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 4 x half> %vb
   %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %neg, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,18 +168,15 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v10, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 8 x half> %vb
   %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %neg, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,18 +226,15 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v12, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v20, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -278,34 +254,16 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 16 x half> %vc
   %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %neg, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -321,38 +279,20 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT:    vmv4r.v v28, v12
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs4r.v v8, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v16, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -376,77 +316,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT:    vmv8r.v v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v24, a0
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v0, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vmv4r.v v20, v12
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v8
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -467,73 +409,86 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmv.v.f v24, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfneg.v v24, v24
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT:    vmv8r.v v24, v16
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmv.v.v v4, v0
+; ZVFHMIN-NEXT:    lui a0, 8
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v0, v0, a0
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT:    vmv.v.v v8, v4
+; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret

>From 4d0842f5f9684cfad4727a917e629a4f254f546e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 18:54:27 -0700
Subject: [PATCH 2/3] fixup! clang-format

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp |  9 ++++-----
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp         | 13 ++++++-------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b551462831acef..1a874979006d43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1698,8 +1698,7 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
   EVT IntVT = VT.changeVectorElementTypeToInteger();
 
   // FIXME: We shouldn't restrict this to scalable vectors.
-  if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
-      VT.isScalableVector()) {
+  if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) && VT.isScalableVector()) {
     SDLoc DL(Node);
     SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
     SDValue ClearSignMask = DAG.getConstant(
@@ -1717,8 +1716,7 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
   // FIXME: We shouldn't restrict this to scalable vectors.
   if (VT == Node->getOperand(1).getValueType() &&
       TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
-      TLI.isOperationLegalOrCustom(ISD::OR, IntVT) &&
-      VT.isScalableVector()) {
+      TLI.isOperationLegalOrCustom(ISD::OR, IntVT) && VT.isScalableVector()) {
     SDLoc DL(Node);
     SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
     SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
@@ -1734,7 +1732,8 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
     SDNodeFlags Flags;
     Flags.setDisjoint(true);
 
-    SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+    SDValue CopiedSign =
+        DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
 
     return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cddd65f58baba8..b402089e485b4f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -882,13 +882,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
     // TODO: support more ops.
     static const unsigned ZvfhminPromoteOps[] = {
-        ISD::FMINNUM,     ISD::FMAXNUM,      ISD::FADD,        ISD::FSUB,
-        ISD::FMUL,        ISD::FMA,          ISD::FDIV,        ISD::FSQRT,
-        ISD::FCEIL,
-        ISD::FFLOOR,      ISD::FROUND,       ISD::FROUNDEVEN,  ISD::FRINT,
-        ISD::FNEARBYINT,  ISD::IS_FPCLASS,   ISD::SETCC,       ISD::FMAXIMUM,
-        ISD::FMINIMUM,    ISD::STRICT_FADD,  ISD::STRICT_FSUB, ISD::STRICT_FMUL,
-        ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+        ISD::FMINNUM,     ISD::FMAXNUM,     ISD::FADD,         ISD::FSUB,
+        ISD::FMUL,        ISD::FMA,         ISD::FDIV,         ISD::FSQRT,
+        ISD::FCEIL,       ISD::FFLOOR,      ISD::FROUND,       ISD::FROUNDEVEN,
+        ISD::FRINT,       ISD::FNEARBYINT,  ISD::IS_FPCLASS,   ISD::SETCC,
+        ISD::FMAXIMUM,    ISD::FMINIMUM,    ISD::STRICT_FADD,  ISD::STRICT_FSUB,
+        ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
 
     // TODO: support more vp ops.
     static const unsigned ZvfhminPromoteVPOps[] = {

>From 395360eca801bfe993ae07d5a6ec2c69123f29b2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 30 Aug 2024 11:43:26 -0700
Subject: [PATCH 3/3] fixup! Address review comment. Defer unrolling to
 VectorLegalizer::Expand.

---
 .../SelectionDAG/LegalizeVectorOps.cpp        | 70 ++++++++++---------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1a874979006d43..200a7be97f6dfa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -916,11 +916,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     Results.push_back(ExpandFNEG(Node));
     return;
   case ISD::FABS:
-    Results.push_back(ExpandFABS(Node));
-    return;
+    if (SDValue Expanded = ExpandFABS(Node)) {
+      Results.push_back(Expanded);
+      return;
+    }
+    break;
   case ISD::FCOPYSIGN:
-    Results.push_back(ExpandFCOPYSIGN(Node));
-    return;
+    if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
+      Results.push_back(Expanded);
+      return;
+    }
+    break;
   case ISD::FSUB:
     ExpandFSUB(Node, Results);
     return;
@@ -1698,15 +1704,15 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
   EVT IntVT = VT.changeVectorElementTypeToInteger();
 
   // FIXME: We shouldn't restrict this to scalable vectors.
-  if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) && VT.isScalableVector()) {
-    SDLoc DL(Node);
-    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
-    SDValue ClearSignMask = DAG.getConstant(
-        APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
-    SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
-    return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
-  }
-  return DAG.UnrollVectorOp(Node);
+  if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector())
+    return SDValue();
+
+  SDLoc DL(Node);
+  SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+  SDValue ClearSignMask = DAG.getConstant(
+      APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+  return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
 }
 
 SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
@@ -1714,30 +1720,30 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
   EVT IntVT = VT.changeVectorElementTypeToInteger();
 
   // FIXME: We shouldn't restrict this to scalable vectors.
-  if (VT == Node->getOperand(1).getValueType() &&
-      TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
-      TLI.isOperationLegalOrCustom(ISD::OR, IntVT) && VT.isScalableVector()) {
-    SDLoc DL(Node);
-    SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
-    SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+  if (VT != Node->getOperand(1).getValueType() ||
+      !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
+      !TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector())
+    return SDValue();
 
-    SDValue SignMask = DAG.getConstant(
-        APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
-    SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+  SDLoc DL(Node);
+  SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+  SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
 
-    SDValue ClearSignMask = DAG.getConstant(
-        APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
-    SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+  SDValue SignMask = DAG.getConstant(
+      APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+  SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
 
-    SDNodeFlags Flags;
-    Flags.setDisjoint(true);
+  SDValue ClearSignMask = DAG.getConstant(
+      APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
 
-    SDValue CopiedSign =
-        DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+  SDNodeFlags Flags;
+  Flags.setDisjoint(true);
 
-    return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
-  }
-  return DAG.UnrollVectorOp(Node);
+  SDValue CopiedSign =
+      DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+  return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
 }
 
 void VectorLegalizer::ExpandFSUB(SDNode *Node,



More information about the llvm-commits mailing list