[llvm] [LegalizeDAG][RISCV] Don't promote f16 vector ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh. (PR #106652)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 20:53:50 PDT 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/106652
>From 83975c9adf3181fed7e4a8050d83e6e17f836360 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 18:37:06 -0700
Subject: [PATCH 1/3] [LegalizeDAG][RISCV] Don't promote f16 vector
ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh.
The fp_extend will canonicalize NaNs which is not the semantics of
FNEG/FABS/FCOPYSIGN.
For fixed vectors I'm scalarizing due to test changes on other targets
where the scalarization is expected. I will try to address in a follow up.
For scalable vectors, we bitcast to integer and use integer logic ops.
---
.../SelectionDAG/LegalizeVectorOps.cpp | 58 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 5756 +++++++++++++++--
llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll | 65 +-
.../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 647 +-
.../RISCV/rvv/vfmsub-constrained-sdnode.ll | 275 +-
llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll | 59 +-
.../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 424 +-
.../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 341 +-
9 files changed, 5873 insertions(+), 1761 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2557fa288606e7..b551462831acef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -139,6 +139,8 @@ class VectorLegalizer {
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
+ SDValue ExpandFABS(SDNode *Node);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -913,6 +915,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::FNEG:
Results.push_back(ExpandFNEG(Node));
return;
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
+ return;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ return;
case ISD::FSUB:
ExpandFSUB(Node, Results);
return;
@@ -1674,7 +1682,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
- TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
+ (TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector())) {
SDLoc DL(Node);
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
SDValue SignMask = DAG.getConstant(
@@ -1685,6 +1693,54 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
}
+SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+ VT.isScalableVector()) {
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
+ }
+ return DAG.UnrollVectorOp(Node);
+}
+
+SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (VT == Node->getOperand(1).getValueType() &&
+ TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, IntVT) &&
+ VT.isScalableVector()) {
+ SDLoc DL(Node);
+ SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
+ }
+ return DAG.UnrollVectorOp(Node);
+}
+
void VectorLegalizer::ExpandFSUB(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 09928dcc1f489a..cddd65f58baba8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -884,7 +884,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const unsigned ZvfhminPromoteOps[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FCEIL,
ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
@@ -1016,6 +1016,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// load/store
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
// Custom split nxv32f16 since nxv32f32 if not legal.
if (VT == MVT::nxv32f16) {
setOperationAction(ZvfhminPromoteOps, VT, Custom);
@@ -1271,6 +1275,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
}
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
// Don't promote f16 vector operations to f32 if f32 vector type is
// not legal.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index fb9c0a57fd1bee..9ec4ed90720b95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -514,176 +514,83 @@ define void @fneg_v8f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fneg_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <8 x half>, ptr %x
- %b = fneg <8 x half> %a
- store <8 x half> %b, ptr %x
- ret void
-}
-
-define void @fneg_v6f16(ptr %x) {
-; ZVFH-LABEL: fneg_v6f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-RV32-LABEL: fneg_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
- %a = load <6 x half>, ptr %x
- %b = fneg <6 x half> %a
- store <6 x half> %b, ptr %x
- ret void
-}
-
-define void @fneg_v4f32(ptr %x) {
-; ZVFH-LABEL: fneg_v4f32:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; ZVFH-NEXT: vle32.v v8, (a0)
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: vse32.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: fneg_v4f32:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vle32.v v8, (a0)
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <4 x float>, ptr %x
- %b = fneg <4 x float> %a
- store <4 x float> %b, ptr %x
- ret void
-}
-
-define void @fneg_v2f64(ptr %x) {
-; CHECK-LABEL: fneg_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: ret
- %a = load <2 x double>, ptr %x
- %b = fneg <2 x double> %a
- store <2 x double> %b, ptr %x
- ret void
-}
-
-define void @fabs_v8f16(ptr %x) {
-; ZVFH-LABEL: fabs_v8f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: fabs_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <8 x half>, ptr %x
- %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
- store <8 x half> %b, ptr %x
- ret void
-}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-
-define void @fabs_v6f16(ptr %x) {
-; ZVFH-LABEL: fabs_v6f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v8f16:
; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v8f16:
; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v8f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
@@ -691,246 +598,1645 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 52(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 50(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 46(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 42(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 40(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 40
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
-; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v8f16:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -80
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+ %a = load <8 x half>, ptr %x
+ %b = fneg <8 x half> %a
+ store <8 x half> %b, ptr %x
+ ret void
+}
+
+define void @fneg_v6f16(ptr %x) {
+; ZVFH-LABEL: fneg_v6f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: vse16.v v8, (a0)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa3, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa2, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 78(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 66(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 56
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 70(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 68(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
- %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
+ %b = fneg <6 x half> %a
store <6 x half> %b, ptr %x
ret void
}
-declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
-define void @fabs_v4f32(ptr %x) {
-; ZVFH-LABEL: fabs_v4f32:
+define void @fneg_v4f32(ptr %x) {
+; ZVFH-LABEL: fneg_v4f32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZVFH-NEXT: vle32.v v8, (a0)
-; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: vfneg.v v8, v8
; ZVFH-NEXT: vse32.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fabs_v4f32:
+; ZVFHMIN-LABEL: fneg_v4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vle32.v v8, (a0)
-; ZVFHMIN-NEXT: vfabs.v v8, v8
+; ZVFHMIN-NEXT: vfneg.v v8, v8
; ZVFHMIN-NEXT: vse32.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <4 x float>, ptr %x
- %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %b = fneg <4 x float> %a
store <4 x float> %b, ptr %x
ret void
}
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-define void @fabs_v2f64(ptr %x) {
-; CHECK-LABEL: fabs_v2f64:
+define void @fneg_v2f64(ptr %x) {
+; CHECK-LABEL: fneg_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, ptr %x
- %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %b = fneg <2 x double> %a
store <2 x double> %b, ptr %x
ret void
}
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-define void @copysign_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v8f16:
+define void @fabs_v8f16(ptr %x) {
+; ZVFH-LABEL: fabs_v8f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: copysign_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <8 x half>, ptr %x
- %b = load <8 x half>, ptr %y
- %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
- store <8 x half> %c, ptr %x
- ret void
-}
-declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
-
-define void @copysign_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v6f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vle16.v v9, (a1)
-; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT: vfabs.v v8, v8
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFH-RV64-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+ %a = load <8 x half>, ptr %x
+ %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ store <8 x half> %b, ptr %x
+ ret void
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+
+define void @fabs_v6f16(ptr %x) {
+; ZVFH-LABEL: fabs_v6f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: vse16.v v8, (a0)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 42(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 40
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+ %a = load <6 x half>, ptr %x
+ %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
+ store <6 x half> %b, ptr %x
+ ret void
+}
+declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
+
+define void @fabs_v4f32(ptr %x) {
+; ZVFH-LABEL: fabs_v4f32:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVFH-NEXT: vle32.v v8, (a0)
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: vse32.v v8, (a0)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fabs_v4f32:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vle32.v v8, (a0)
+; ZVFHMIN-NEXT: vfabs.v v8, v8
+; ZVFHMIN-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-NEXT: ret
+ %a = load <4 x float>, ptr %x
+ %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ store <4 x float> %b, ptr %x
+ ret void
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+
+define void @fabs_v2f64(ptr %x) {
+; CHECK-LABEL: fabs_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <2 x double>, ptr %x
+ %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ store <2 x double> %b, ptr %x
+ ret void
+}
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+define void @copysign_v8f16(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v9, (a1)
+; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT: vse16.v v8, (a0)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -112
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 112
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -176
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 176
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 176
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+ %a = load <8 x half>, ptr %x
+ %b = load <8 x half>, ptr %y
+ %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
+ store <8 x half> %c, ptr %x
+ ret void
+}
+declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+
+define void @copysign_v6f16(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_v6f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v9, (a1)
+; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT: vse16.v v8, (a0)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 40
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -128
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 118(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 114(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 104
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -176
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 176
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 176
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
@@ -991,24 +2297,285 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_vf_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 148(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 146(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
%c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1026,52 +2593,315 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa3, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa2, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -112
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 112
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 90(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 88
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 112
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 148(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 146(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -1130,24 +2960,409 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_neg_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -144
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 144
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft0, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft2, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft1, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft1, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft0, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft0, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa1, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa1, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 109(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 117(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 125(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 142(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 140(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 138(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 134(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 130(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 144
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -240
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 240
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft2, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft1, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft1, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft0, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa1, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa1, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa2, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa2, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa3, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa3, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 200(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 216(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 169(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 185(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 201(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 217(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 238(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 236(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 234(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 232(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 230(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 228(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 226(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 224(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 240
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = fneg <8 x half> %b
@@ -1166,52 +3381,439 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fsh ft1, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh ft0, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa0, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa1, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa4, fa1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa2, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa3, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa4, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 40
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fsh ft1, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft0, ft1, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh ft0, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa0, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa1, fa0, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa1, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa2, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa2, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa3, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa3, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa4, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft0, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft1, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft2, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft1, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft1, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft0, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh ft0, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa0, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa1, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa1, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 101(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 109(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 117(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 125(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 133(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 158(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 156(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 154(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 150(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 148(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 146(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 128(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 144(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 144
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 142(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 140(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 138(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 136(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 136
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -240
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 240
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa2, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft0, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft1, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft2, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft2, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft1, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft1, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft1, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft0, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh ft0, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh ft0, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa0, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa1, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa1, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa2, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa2, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa3, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa3, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 200(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa4, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 216(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 169(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 161(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 185(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 177(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 201(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 193(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 217(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 209(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 238(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 236(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 234(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 232(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 230(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 228(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 192(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 226(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 208(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 224(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 240
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fneg <6 x half> %b
@@ -1274,25 +3876,245 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 78(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 74(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 72
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -128
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 120
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <4 x half>, ptr %x
%b = load <4 x float>, ptr %y
%c = fneg <4 x float> %b
@@ -1316,69 +4138,278 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, sp, 8
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -48
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a2, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 40
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 48
+; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: mv a2, sp
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: addi a1, sp, 8
-; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse32.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -80
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a2, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 70(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 66(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 72
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 80
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -144
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 144
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a2, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi a2, a2, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 134(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 132(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 130(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 136
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 140(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 144
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <3 x half>, ptr %x
%b = load <3 x float>, ptr %y
%c = fneg <3 x float> %b
@@ -1676,26 +4707,257 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fmsub_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = load <8 x half>, ptr %z
@@ -1716,56 +4978,275 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmacc.vv v10, v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmacc.vv v10, v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmacc.vv v10, v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -2153,17 +5634,421 @@ define void @fneg_v16f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fneg_v16f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFH-RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFH-RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_offset ra, -4
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_offset s0, -8
+; ZVFHMIN-ZFH-RV32-NEXT: addi s0, sp, 96
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFH-RV32-NEXT: andi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, s0, -96
+; ZVFHMIN-ZFH-RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFH-RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFH-RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFH-RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-ZFH-RV64-NEXT: addi s0, sp, 96
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFH-RV64-NEXT: andi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 46(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 42(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 38(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 34(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, s0, -96
+; ZVFHMIN-ZFH-RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFH-RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 160
+; ZVFHMIN-ZFHIN-RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_offset ra, -4
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_offset s0, -8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi s0, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFHIN-RV32-NEXT: andi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 53(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 61(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 69(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 77(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 85(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 93(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 126(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 124(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 122(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 118(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 116(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 114(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 110(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 68(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 108(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 106(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 76(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 102(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 100(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 98(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 96
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, s0, -160
+; ZVFHMIN-ZFHIN-RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 160
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -224
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 224
+; ZVFHMIN-ZFHIN-RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi s0, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-ZFHIN-RV64-NEXT: andi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 81(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 89(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 97(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 105(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 113(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 121(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 129(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 137(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 145(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 153(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 190(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 188(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 186(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 184(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 182(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 180(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 178(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 176(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 96(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 174(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 104(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 172(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 112(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 170(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 120(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 168(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 128(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 166(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 136(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 164(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 144(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 162(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 152(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 160(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 160
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, s0, -224
+; ZVFHMIN-ZFHIN-RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 224
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <16 x half>, ptr %x
%b = fneg <16 x half> %a
store <16 x half> %b, ptr %x
@@ -3727,31 +7612,281 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fmsub_vf_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = insertelement <8 x half> poison, half %z, i32 0
@@ -3772,66 +7907,299 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmacc.vv v11, v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v11
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmacc.vv v11, v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v11
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFH-RV64-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmv.v.f v8, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v11, v8
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfmacc.vv v11, v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -96
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 65(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 73(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 94(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 92(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 90(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 88(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 86(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 84(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 64(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 82(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 72(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmv.v.f v9, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 80(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 80
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfmacc.vv v11, v9, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 96
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 95a410ea56b74a..4bf9ae16cdaf01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -19,12 +19,10 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
ret <vscale x 1 x half> %r
@@ -41,12 +39,10 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
ret <vscale x 2 x half> %r
@@ -63,12 +59,10 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
ret <vscale x 4 x half> %r
@@ -85,12 +79,10 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
ret <vscale x 8 x half> %r
@@ -107,12 +99,10 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
ret <vscale x 16 x half> %r
@@ -129,17 +119,10 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
ret <vscale x 32 x half> %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 029a121d08980c..c71c07488581a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -19,13 +19,12 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs)
ret <vscale x 1 x half> %r
@@ -45,12 +44,11 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -67,18 +65,13 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x half> %vs
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n)
@@ -99,17 +92,12 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -130,12 +118,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -158,12 +145,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -182,19 +168,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x float> %vs
%eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half>
@@ -216,19 +197,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -254,12 +230,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -286,12 +261,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -312,22 +286,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x double> %vs
%eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half>
@@ -351,22 +320,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -387,13 +351,12 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs)
ret <vscale x 2 x half> %r
@@ -413,12 +376,11 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -435,18 +397,13 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 2 x half> %vs
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n)
@@ -467,17 +424,12 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -497,13 +449,12 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs)
ret <vscale x 4 x half> %r
@@ -523,12 +474,11 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -545,18 +495,13 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 4 x half> %vs
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n)
@@ -577,17 +522,12 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -607,13 +547,12 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs)
ret <vscale x 8 x half> %r
@@ -633,12 +572,11 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -655,18 +593,13 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x half> %vs
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n)
@@ -687,17 +620,12 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -718,12 +646,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -746,12 +673,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vfmv.v.f v12, fa0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -770,19 +696,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x float> %vs
%eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half>
@@ -804,19 +725,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v12, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -842,12 +758,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -874,12 +789,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -900,22 +814,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x double> %vs
%eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half>
@@ -939,22 +848,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -975,13 +879,12 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs)
ret <vscale x 16 x half> %r
@@ -1001,12 +904,11 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1023,18 +925,13 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, <
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 16 x half> %vs
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n)
@@ -1055,17 +952,12 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1085,19 +977,12 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v24, v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs)
ret <vscale x 32 x half> %r
@@ -1117,17 +1002,13 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vmv.v.v v28, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v16, v24, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -1144,29 +1025,13 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, <
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v24, v24, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 32 x half> %vs
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n)
@@ -1187,22 +1052,14 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vmv.v.v v28, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
index c835dc72268b32..725ac14b0e7a7e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v12, v10, v9
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %vc
%vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v12, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %vb
%vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vc
%vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,16 +168,13 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v20, v16, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %va
%vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,16 +226,13 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
@@ -278,19 +254,30 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v16, v24, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vb
%vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -312,16 +299,13 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
@@ -351,58 +335,60 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: mul a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vmv4r.v v20, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v0, v8, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
@@ -431,85 +417,78 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: sub sp, sp, a0
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 28 * vlenb
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 20
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v24, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v0, fa5
+; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs4r.v v20, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v0, v24, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 20
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 28
; ZVFHMIN-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 21b895b812354a..2991e52d4266a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -17,12 +17,9 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 1 x half> %va
ret <vscale x 1 x half> %vb
@@ -37,12 +34,9 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 2 x half> %va
ret <vscale x 2 x half> %vb
@@ -57,12 +51,9 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 4 x half> %va
ret <vscale x 4 x half> %vb
@@ -77,12 +68,9 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 8 x half> %va
ret <vscale x 8 x half> %vb
@@ -97,12 +85,9 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 16 x half> %va
ret <vscale x 16 x half> %vb
@@ -117,17 +102,9 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 32 x half> %va
ret <vscale x 32 x half> %vb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index b54590cd9d8440..2f41b59d6b2253 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -22,24 +22,17 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %va
%neg2 = fneg <vscale x 1 x half> %vc
@@ -61,23 +54,16 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -98,24 +84,17 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %va
%neg2 = fneg <vscale x 2 x half> %vb
@@ -137,23 +116,16 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -174,24 +146,17 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vb
%neg2 = fneg <vscale x 4 x half> %vc
@@ -213,23 +178,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -250,24 +208,17 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%neg2 = fneg <vscale x 8 x half> %va
@@ -289,23 +240,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -326,25 +270,17 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv4r.v v4, v8
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vc
%neg2 = fneg <vscale x 16 x half> %vb
@@ -361,29 +297,21 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv4r.v v28, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -408,92 +336,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v24, v24, a0
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vxor.vx v0, v16, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v28
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -515,80 +430,95 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 40
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
-; ZVFHMIN-NEXT: mul a0, a0, a1
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.v v4, v0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v24, v0, a0
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 40
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 2f7e693a8a6f98..dc23b7dfbf1ee4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfmadd.vv v10, v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %va
%vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %neg, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %va
%vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %neg, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vb
%vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %neg, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,18 +168,15 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %neg, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,18 +226,15 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -278,34 +254,16 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vc
%vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %neg, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -321,38 +279,20 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: vmv4r.v v28, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -376,77 +316,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: vmv8r.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv4r.v v20, v12
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v8
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -467,73 +409,86 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.v v4, v0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v0, v0, a0
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vmv.v.v v8, v4
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
>From 4d0842f5f9684cfad4727a917e629a4f254f546e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Aug 2024 18:54:27 -0700
Subject: [PATCH 2/3] fixup! clang-format
---
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 9 ++++-----
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 ++++++-------
2 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b551462831acef..1a874979006d43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1698,8 +1698,7 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
EVT IntVT = VT.changeVectorElementTypeToInteger();
// FIXME: We shouldn't restrict this to scalable vectors.
- if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
- VT.isScalableVector()) {
+ if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) && VT.isScalableVector()) {
SDLoc DL(Node);
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
SDValue ClearSignMask = DAG.getConstant(
@@ -1717,8 +1716,7 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
// FIXME: We shouldn't restrict this to scalable vectors.
if (VT == Node->getOperand(1).getValueType() &&
TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
- TLI.isOperationLegalOrCustom(ISD::OR, IntVT) &&
- VT.isScalableVector()) {
+ TLI.isOperationLegalOrCustom(ISD::OR, IntVT) && VT.isScalableVector()) {
SDLoc DL(Node);
SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
@@ -1734,7 +1732,8 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
SDNodeFlags Flags;
Flags.setDisjoint(true);
- SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+ SDValue CopiedSign =
+ DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cddd65f58baba8..b402089e485b4f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -882,13 +882,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
- ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
- ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN,
+ ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC,
+ ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
>From 395360eca801bfe993ae07d5a6ec2c69123f29b2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 30 Aug 2024 11:43:26 -0700
Subject: [PATCH 3/3] fixup! Address review comment. Defer unrolling to
VectorLegalizer::Expand.
---
.../SelectionDAG/LegalizeVectorOps.cpp | 70 ++++++++++---------
1 file changed, 38 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1a874979006d43..200a7be97f6dfa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -916,11 +916,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
Results.push_back(ExpandFNEG(Node));
return;
case ISD::FABS:
- Results.push_back(ExpandFABS(Node));
- return;
+ if (SDValue Expanded = ExpandFABS(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FCOPYSIGN:
- Results.push_back(ExpandFCOPYSIGN(Node));
- return;
+ if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FSUB:
ExpandFSUB(Node, Results);
return;
@@ -1698,15 +1704,15 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
EVT IntVT = VT.changeVectorElementTypeToInteger();
// FIXME: We shouldn't restrict this to scalable vectors.
- if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) && VT.isScalableVector()) {
- SDLoc DL(Node);
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
- SDValue ClearSignMask = DAG.getConstant(
- APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
- SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
- return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
- }
- return DAG.UnrollVectorOp(Node);
+ if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector())
+ return SDValue();
+
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
}
SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
@@ -1714,30 +1720,30 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
EVT IntVT = VT.changeVectorElementTypeToInteger();
// FIXME: We shouldn't restrict this to scalable vectors.
- if (VT == Node->getOperand(1).getValueType() &&
- TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
- TLI.isOperationLegalOrCustom(ISD::OR, IntVT) && VT.isScalableVector()) {
- SDLoc DL(Node);
- SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
- SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+ if (VT != Node->getOperand(1).getValueType() ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector())
+ return SDValue();
- SDValue SignMask = DAG.getConstant(
- APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
- SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+ SDLoc DL(Node);
+ SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
- SDValue ClearSignMask = DAG.getConstant(
- APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
- SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
- SDNodeFlags Flags;
- Flags.setDisjoint(true);
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
- SDValue CopiedSign =
- DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
- return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
- }
- return DAG.UnrollVectorOp(Node);
+ SDValue CopiedSign =
+ DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
}
void VectorLegalizer::ExpandFSUB(SDNode *Node,
More information about the llvm-commits
mailing list